コード例 #1
0
ファイル: suite_generator.py プロジェクト: felipecruz/thc
def main(file_path):
    dir_and_name = file_path.split('/')
    if len(dir_and_name) > 1:
        tests_dir = dir_and_name[0]
        file_name = dir_and_name[1]
    else:
        tests_dir = "."
        file_name = dir_and_name[0]

    file_content = ""
    lines = open(file_path, "rt").readlines()
    for line in lines:
        if line.startswith("#include"):
            continue
        file_content += line

    parser = CParser()
    generator = CGenerator()

    ast = parser.parse(file_content, file_name, debuglevel=0)

    test_finder = MethodVisitor()
    test_finder.visit(ast)

    include_text = ""

    suite_code = build_main(test_finder.test_functions,
                            file_name,
                            include_text)

    suite_file = open("/".join([tests_dir, "suite.c"]), "wt")
    suite_file.write(suite_code)
    suite_file.close()
コード例 #2
0
ファイル: c_types.py プロジェクト: ethteck/mips3c
def parse_c(source: str,
            initial_scope: CParserScope) -> Tuple[ca.FileAST, CParserScope]:
    # This is a modified version of `CParser.parse()` which initializes `_scope_stack`,
    # which contains the only stateful part of the parser that needs to be preserved
    # when parsing multiple files.
    c_parser = CParser()
    c_parser.clex.filename = "<source>"
    c_parser.clex.reset_lineno()
    c_parser._scope_stack = [initial_scope.copy()]
    c_parser._last_yielded_token = None
    try:
        ast = c_parser.cparser.parse(input=source, lexer=c_parser.clex)
    except ParseError as e:
        msg = str(e)
        position, msg = msg.split(": ", 1)
        parts = position.split(":")
        if len(parts) >= 2:
            # Adjust the line number by 1 to correct for the added typedefs
            lineno = int(parts[1]) - 1
            posstr = f" at line {lineno}"
            if len(parts) >= 3:
                posstr += f", column {parts[2]}"
            try:
                line = source.split("\n")[lineno].rstrip()
                posstr += "\n\n" + line
            except IndexError:
                posstr += "(out of bounds?)"
        else:
            posstr = ""
        raise DecompFailure(
            f"Syntax error when parsing C context.\n{msg}{posstr}")
    return ast, c_parser._scope_stack[0].copy()
コード例 #3
0
def backward_call(decl):
    parser = CParser()
    decl = parser.parse(decl, filename='<stdin>').ext[0]
    name = decl.name
    args = decl.type.args
    nargs = len(args.params)
    if len(decl.type.type.type.names) > 1:
        assert False
    else:
        rtype = decl.type.type.type.names[0]

    ndecl = rtype + ' ' + name[len('cephes_ '):] + '('

    call_expr = name + '('
    for param in args.params:
        if len(param.type.type.names) > 1:
            assert False
        typ = param.type.type.names[0]
        ndecl += typ + ' ' + param.name + ', '
        call_expr += param.name + ', '
    if nargs > 0:
        ndecl = ndecl[:-2]
        call_expr = call_expr[:-2]
    ndecl += ')'
    call_expr += ')'
    ndecl += " { return %s; }" % call_expr
    return ndecl
コード例 #4
0
ファイル: parser.py プロジェクト: felipecruz/qc
def parse(file_content, file_name):
    parser = CParser()
    generator = CGenerator()

    ast = parser.parse(file_content, file_name, debuglevel=0)

    test_finder = ASTVisitor()
    test_finder.visit(ast)

    return test_finder
コード例 #5
0
ファイル: cparse.py プロジェクト: wodelover/mlib
def parse_cstruct(code, cls=None):
    global TYPE_TRL_TABLE
    cp = CParser()
    st = cp.parse(PREPEND_TYPES + '\n' + code)
    decls = list(st.children())
    mystruct = decls.pop()[1]
    if not TYPE_TRL_TABLE:
        TYPE_TRL_TABLE = mk_trltable(decls)

    fields = {'_fields_': get_fields(mystruct)}
    return type(mystruct.name, (cls or c.Structure, ), fields)
コード例 #6
0
ファイル: inspect.py プロジェクト: vberlier/narmock
    def __init__(self, source_code, functions=None, keep_args=""):
        self.source_code = source_code
        self.functions = functions
        self.token_stream = self.tokenize(source_code)
        self.previous = None
        self.current = None

        self.bracket_stack = []
        self.source_context = []
        self.typedefs = ["typedef int __builtin_va_list;"]

        self.cparser = CParser()

        self.keep_args = re.compile(f"^{keep_args}$")
コード例 #7
0
def parse(text, filename='', parser=None, fake_typedefs=False):
    if parser is None:
        parser = CParser()

    if fake_typedefs:
        text = ''.join((fake.typedefs, f'# 1 "{filename}"\n', text))

    ast = parser.parse(text, filename)
    for i in range(len(ast.ext)):
        node = ast.ext[i]
        if isinstance(node, c_ast.Typedef) and node.name == '__end_of_fakes__':
            break

    del ast.ext[:i + 1]
    return ast
コード例 #8
0
 def p_pointer(self, p):
     """ pointer : TIMES type_qualifier_list_opt
                 | TIMES type_qualifier_list_opt pointer
                 | AND type_qualifier_list_opt
                 | AND type_qualifier_list_opt pointer
     """
     return CParser.p_pointer(self, p)
コード例 #9
0
 def __init__(self,
              name,
              headers,
              soname,
              guard,
              decls,
              wrappers=(),
              optional=(),
              *,
              fail_log_level='warning'):
     self.name = name
     self.headers = list(headers)
     self.soname = soname
     self.guard = guard
     ast = CParser().parse(decls)
     visitor = Visitor()
     visitor.visit(ast)
     self.nodes = visitor.nodes
     self.wrappers = list(wrappers)
     self.environment = jinja2.Environment(autoescape=False,
                                           trim_blocks=True,
                                           loader=jinja2.FileSystemLoader(
                                               os.path.dirname(__file__)))
     self.environment.filters['gen'] = gen_node
     self.environment.filters['rename'] = rename_func
     self.environment.filters['ptr'] = make_func_ptr
     self.environment.filters['args'] = func_args
     self.environment.globals['name'] = self.name
     self.environment.globals['headers'] = self.headers
     self.environment.globals['soname'] = self.soname
     self.environment.globals['guard'] = self.guard
     self.environment.globals['nodes'] = self.nodes
     self.environment.globals['wrappers'] = set(wrappers)
     self.environment.globals['optional'] = set(optional)
     self.environment.globals['fail_log_level'] = fail_log_level
コード例 #10
0
ファイル: template.py プロジェクト: isabella232/fffc
 def __new__(cls):
     if not cls.text:
         cls.text = cls._load_template()
         cls.parser = CParser()
         cls.generator = CGenerator()
         cls.saved_ast = cls.parser.parse(cls.text)
     return super().__new__(cls)
コード例 #11
0
    def build(self, data_dir):
        for i in range(1, 105):
            data_subdir = data_dir + "/" + str(i)
            for file_name in os.listdir(data_subdir):
                num = int(file_name[:-4])
                name = data_subdir + "/" + file_name
                with open(name, errors="ignore") as f:
                    code = f.read()
                    parser = CParser()
                    ast = parser.parse(comment_remover(code))
                    visitor = CodeToWordVisitor()
                    visitor.visit(ast)
                    seq = visitor.pre_order
                    self._add_words(seq)
                    self.data[(i, num)] = self._words2data(seq)
            print("Directory {} built".format(i))

        print("Vocabulary Size: {}".format(len(self.dictionary)))
コード例 #12
0
def api_fdecls(decl):
    parser = CParser()
    decl = parser.parse(decl, filename='<stdin>').ext[0]
    name = decl.name
    args = decl.type.args
    nargs = len(args.params)
    if len(decl.type.type.type.names) > 1:
        assert False
    else:
        rtype = decl.type.type.type.names[0]
    ndecl = rtype + ' ncephes_' + _rcs(name) + '('
    for param in args.params:
        if len(param.type.type.names) > 1:
            assert False
        typ = param.type.type.names[0]
        ndecl += typ + ' ' + param.name + ', '
    if nargs > 0:
        ndecl = ndecl[:-2]
    return ndecl + ');'
コード例 #13
0
def test_tokens(f):
    parser = CParser(yacc_debug=True)
    parser.clex.filename = f
    parser.clex.reset_lineno()
    txt = smart_read(f)
    parser.clex.input(txt)
    token = parser.clex.token()
    while token:
        print(token)
        token = parser.clex.token()
コード例 #14
0
ファイル: parse_code.py プロジェクト: klemniops/linoone
def parse_file(filename,
               use_cpp=False,
               cpp_path='cpp',
               cpp_args='',
               parser=None):
    """ Parse a C file using pycparser.

        filename:
            Name of the file you want to parse.

        use_cpp:
            Set to True if you want to execute the C pre-processor
            on the file prior to parsing it.

        cpp_path:
            If use_cpp is True, this is the path to 'cpp' on your
            system. If no path is provided, it attempts to just
            execute 'cpp', so it must be in your PATH.

        cpp_args:
            If use_cpp is True, set this to the command line arguments strings
            to cpp. Be careful with quotes - it's best to pass a raw string
            (r'') here. For example:
            r'-I../utils/fake_libc_include'
            If several arguments are required, pass a list of strings.

        parser:
            Optional parser object to be used instead of the default CParser

        When successful, an AST is returned. ParseError can be
        thrown if the file doesn't parse successfully.

        Errors from cpp will be printed out.
    """
    if use_cpp:
        text = preprocess_file(filename, cpp_path, cpp_args)
    else:
        with io.open(filename) as f:
            text = f.read()

    if parser is None:
        parser = CParser()
    return parser.parse(text, filename)
コード例 #15
0
    def flush(self):
        self.pre = pcpp.Preprocessor()
        self.pre.line_directive = None

        self.cparse = CParser()
        # self.cparse.parse(
        #     """
        #     typedef int uint8_t;
        #     typedef int uint16_t;
        #     typedef int uint32_t;
        #     typedef int uint64_t;
        #     typedef int int8_t;
        #     typedef int int16_t;
        #     typedef int int32_t;
        #     typedef int int64_t;
        #     """, "", 7)

        self.cdata = ""
        self.last_processed = ""
コード例 #16
0
    def __init__(self, source_code, functions, rename_parameters_file=None):
        self.source_code = source_code
        self.functions = functions
        self.token_stream = self.tokenize(source_code)
        self.previous = None
        self.current = None
        self.current_file = None

        self.chunks_to_erase = []
        self.bracket_stack = []
        self.source_context = []
        self.typedefs_code = ['typedef int __builtin_va_list;']
        self.typedefs = {}
        self.structs_code = []
        self.structs = []
        self.struct_typedefs = []
        self.includes = []

        self.cparser = CParser()
        self.param_names = None

        if rename_parameters_file is not None:
            self.param_names = load_param_names(rename_parameters_file)

        self.func_names = []
        self.func_signatures = []
        self.file_ast = None
        self.mocked_functions = []
        self.parse()

        if self.functions:
            for function in sorted(functions):
                print(
                    f"error: Mocked function '{function}' undeclared. Add "
                    "missing include in the test file.",
                    file=sys.stderr)

            raise Exception(
                'Unable to find declarations of all mocked functions. Add missing '
                'include(s) in the test file.')
コード例 #17
0
def compile(code):
	parser = CParser()

	stypes = 'u8 i8 u16 i16 u32 i32 u64 i64 f32 f64 f128'
	code = 'void runner() { ' + code + ' ; }'
	for type in stypes.split(' '):
		code = 'typedef void %s; %s' % (type, code)

	ast = parser.parse(code)
	found = None
	for _, child in ast.children():
		if isinstance(child, FuncDef):
			found = child
			break

	assert found is not None
	assert len(found.body.children()) == 1

	ast = found.body.children()[0][1]
	sexp = AstTranslator().process(ast)

	def run(ctu):
		return bare(SexpRunner(ctu).run(sexp))
	return run
コード例 #18
0
ファイル: extensiblecparser.py プロジェクト: malsyned/pfstest
    def __init__(self,
                 storage_class_specifiers=[],
                 type_qualifiers=[],
                 type_specifiers=[],
                 function_specifiers=[],
                 lexer_base=CLexer,
                 **kwargs):

        self.lexer_class = type("ExtensibleCLexer", (CLexer, ), dict())

        self.add_rule("storage_class_specifier", storage_class_specifiers,
                      self.p_storage_class_specifier)
        self.add_rule("type_qualifier", type_qualifiers, self.p_type_qualifier)
        self.add_rule("function_specifier", function_specifiers,
                      self.p_function_specifier)
        self.add_rule("type_specifier_no_typeid", type_specifiers,
                      self.p_type_specifier_no_typeid)

        kwargs['lexer'] = self.lexer_class
        kwargs['lex_optimize'] = False
        kwargs['yacc_optimize'] = False
        #kwargs['yacc_debug']=True

        CParser.__init__(self, **kwargs)
コード例 #19
0
def parse_c(source: str) -> ca.FileAST:
    try:
        return CParser().parse(source, "<source>")
    except ParseError as e:
        msg = str(e)
        position, msg = msg.split(": ", 1)
        parts = position.split(":")
        if len(parts) >= 2:
            # Adjust the line number by 1 to correct for the added typedefs
            lineno = int(parts[1]) - 1
            posstr = f" at line {lineno}"
            if len(parts) >= 3:
                posstr += f", column {parts[2]}"
            try:
                line = source.split("\n")[lineno].rstrip()
                posstr += "\n\n" + line
            except IndexError:
                posstr += "(out of bounds?)"
        else:
            posstr = ""
        raise DecompFailure(f"Syntax error when parsing C context.\n{msg}{posstr}")
コード例 #20
0
ファイル: BaseTest.py プロジェクト: AnisBoubaker/etslint
class BaseTest(unittest.TestCase):
    _tested_code = ""
    _ast = None
    _parser = CParser()
    _reporter = NoopReporter()
    _rule_instance = None

    def _run_rule(self):
        self._ast = self._parser.parse(self._tested_code, "no_file.c")
        self._rule_instance.visit(self._ast)

    def expect_error(self, message=""):
        self.assertTrue(
            len(self._reporter.get_reports_by_rule(self._rule_instance)) > 0,
            message)
        self._reporter.clear()

    def expect_no_error(self, message=""):
        self.assertTrue(
            len(self._reporter.get_reports_by_rule(self._rule_instance)) == 0,
            message)
        self._reporter.clear()
コード例 #21
0
ファイル: gen_ibv_loader.py プロジェクト: ajameson/spead2
def main(argv):
    environment = jinja2.Environment(autoescape=False, trim_blocks=True)
    environment.filters['gen'] = gen_node
    environment.filters['rename'] = rename_func
    environment.filters['ptr'] = make_func_ptr
    environment.filters['args'] = func_args
    header = environment.from_string(HEADER)
    cxx = environment.from_string(CXX)

    ast = CParser().parse(INPUT)
    visitor = Visitor()
    visitor.visit(ast)
    header_text = header.render(nodes=visitor.nodes)
    cxx_text = cxx.render(nodes=visitor.nodes)
    if len(argv) != 2 or argv[1] not in {'header', 'cxx'}:
        print('Usage: {} header|cxx'.format(argv[0], file=sys.stderr))
        return 1
    elif argv[1] == 'header':
        print(header_text)
    else:
        print(cxx_text)
    return 0
コード例 #22
0
def cparse(text, types, filename='', debuglevel=0) -> CType:
    parser = CParser()

    parser.clex.filename = filename
    parser.clex.reset_lineno()
    parser._last_yielded_token = None
    parser._scope_stack = [dict()]

    for i, (k, _) in enumerate(types.items()):
        parser._add_typedef_name(k, (i, 0))

    try:
        result = parser.cparser.parse(input=text,
                                      lexer=parser.clex,
                                      debug=debuglevel)
    except Exception as e:
        raise RuntimeError(f'Could not parse `{text}`') from e

    return CType.from_ast(result)
コード例 #23
0
ファイル: z.py プロジェクト: cplab/pycparser
        method = 'visit_' + node.__class__.__name__
        visitor = getattr(self, method, self.generic_visit)
        return visitor(node)

    def visit_FuncCall(self, node):
        print("Visiting FuncCall")
        print(node.show())
        print('---- parent ----')
        print(self.current_parent.show())

    def generic_visit(self, node):
        """ Called if no explicit visitor function exists for a
            node. Implements preorder visiting of the node.
        """
        oldparent = self.current_parent
        self.current_parent = node
        for c in node.children():
            self.visit(c)
        self.current_parent = oldparent


if __name__ == "__main__":
    source_code = r'''void foo() {
    L"hi" L"there";
}
    '''

    parser = CParser()
    ast = parser.parse(source_code, filename='zz')
    ast.show(showcoord=True, attrnames=True, nodenames=True)
コード例 #24
0
ファイル: match_functions.py プロジェクト: xcode2010/ghcc
def match_functions(
        repo_info: RepoInfo,
        archive_folder: str,
        temp_folder: str,
        decompile_folder: str,
        use_fake_libc_headers: bool = True,
        preprocess_timeout: Optional[int] = None,
        *,
        progress_bar: Optional[flutes.ProgressBarManager.Proxy] = None
) -> Result:
    # Directions:
    # 1. Clone or extract from archive.
    # 2. For each Makefile, rerun the compilation process with the flag "-E", so only the preprocessor is run.
    #    This probably won't take long as the compiler exits after running the processor, and linking would fail.
    #    Also, consider using "-nostdlib -Ipath/to/fake_libc_include" as suggested by `pycparser`.
    # 3. The .o files are now preprocessed C code. Parse them using `pycparser` to obtain a list of functions.

    start_time = time.time()
    total_files = sum(
        len(makefile) for makefile in repo_info.makefiles.values())
    repo_folder_name = f"{repo_info.repo_owner}_____{repo_info.repo_name}"
    repo_full_name = f"{repo_info.repo_owner}/{repo_info.repo_name}"
    archive_path = (Path(archive_folder) /
                    f"{repo_full_name}.tar.gz").absolute()
    repo_dir = (Path(temp_folder) / repo_folder_name).absolute()
    repo_src_path = repo_dir / "src"
    repo_binary_dir = repo_dir / "bin"
    repo_binary_dir.mkdir(parents=True, exist_ok=True)
    has_error = False

    if progress_bar is not None:
        worker_id = flutes.get_worker_id()
        process_name = f"Worker {worker_id}" if worker_id is not None else "Main Process"
        progress_bar.new(total=total_files,
                         desc=process_name + f" [{repo_full_name}]")

    flutes.log(f"Begin processing {repo_full_name} ({total_files} files)")

    if os.path.exists(archive_path):
        # Extract archive
        flutes.run_command(["tar", f"xzf", str(archive_path)],
                           cwd=str(repo_dir))
        (repo_dir / repo_folder_name).rename(repo_src_path)
    else:
        # Clone repo
        if repo_src_path.exists():
            shutil.rmtree(repo_src_path)
        ret = ghcc.clone(repo_info.repo_owner,
                         repo_info.repo_name,
                         clone_folder=str(repo_dir),
                         folder_name="src")
        if ret.error_type not in [None, ghcc.CloneErrorType.SubmodulesFailed]:
            flutes.log(
                f"Failed to clone {repo_full_name}: error type {ret.error_type}",
                "error")
            # Return a dummy result so this repo is ignored in the future.
            return Result(repo_info.repo_owner, repo_info.repo_name, [], {}, 0,
                          0, 0)

    # Write makefile info to pickle
    with (repo_binary_dir / "makefiles.pkl").open("wb") as f_pkl:
        pickle.dump(repo_info.makefiles, f_pkl)

    gcc_flags = "-E"
    directory_mapping = None
    if use_fake_libc_headers:
        gcc_flags = f"-E -nostdlib -I/usr/src/libc"
        directory_mapping = {ghcc.parse.FAKE_LIBC_PATH: "/usr/src/libc"}

    if progress_bar is not None:
        progress_bar.update(postfix={"status": "preprocessing"})
    makefiles = ghcc.docker_batch_compile(
        str(repo_binary_dir),
        str(repo_src_path),
        compile_timeout=preprocess_timeout,
        gcc_override_flags=gcc_flags,
        use_makefile_info_pkl=True,
        directory_mapping=directory_mapping,
        user_id=(repo_info.idx % 10000) + 30000,  # user IDs 30000 ~ 39999
        exception_log_fn=functools.partial(exception_handler,
                                           repo_info=repo_info))

    parser = CParser(lexer=ghcc.parse.CachedCLexer)
    lexer = ghcc.parse.LexerWrapper()
    decompile_path = Path(decompile_folder)
    extractor = ghcc.parse.FunctionExtractor()
    matched_functions: List[MatchedFunction] = []
    preprocessed_original_code: Dict[str, str] = {}
    files_found = 0
    functions_found = 0
    for makefile in makefiles:
        mkfile_dir = Path(makefile['directory'])
        for path, sha in zip(makefile["binaries"], makefile["sha256"]):
            # Load and parse preprocessed original code.
            code_path = str(mkfile_dir / path)
            json_path = decompile_path / (sha + ".jsonl")
            preprocessed_code_path = repo_binary_dir / sha
            if progress_bar is not None:
                progress_bar.update(1, postfix={"file": code_path})
            if not json_path.exists() or not preprocessed_code_path.exists():
                continue
            try:
                with preprocessed_code_path.open("r") as f:
                    code = f.read()
                code = LINE_CONTROL_REGEX.sub("", code)
            except UnicodeDecodeError:
                continue  # probably a real binary file
            preprocessed_original_code[sha] = code
            try:
                original_ast: ASTNode = parser.parse(code,
                                                     filename=os.path.join(
                                                         repo_full_name, path))
            except (pycparser.c_parser.ParseError, AssertionError) as e:
                # For some reason `pycparser` uses `assert`s in places where there should have been a check.
                flutes.log(
                    f"{repo_full_name}: Parser error when processing file "
                    f"{code_path} ({sha}): {str(e)}", "error")
                has_error = True
                continue  # ignore parsing errors
            original_tokens = ghcc.parse.convert_to_tokens(
                code, parser.clex.cached_tokens)
            files_found += 1
            function_asts = extractor.find_functions(original_ast)
            functions_found += len(function_asts)

            # Collect decompiled functions with matching original code.
            with json_path.open("r") as f:
                decompiled_json = [
                    line for line in f if line
                ]  # don't decode, as we only need the function name
            decompiled_funcs: Dict[str,
                                   str] = {}  # (func_name) -> decompiled_code
            decompiled_var_names: Dict[str, Dict[str, Tuple[str, str]]] = {} \
                # (func_name) -> (var_id) -> (decomp_name, orig_name)

            for line_num, j in enumerate(decompiled_json):
                # Find function name from JSON line without parsing.
                match = JSON_FUNC_NAME_REGEX.search(j)
                assert match is not None
                func_name = match.group(1)
                if func_name not in function_asts:
                    continue

                try:
                    decompiled_data = json.loads(j)
                except json.JSONDecodeError as e:
                    flutes.log(
                        f"{repo_full_name}: Decode error when reading JSON file at {json_path}: "
                        f"{str(e)}", "error")
                    continue
                decompiled_code = decompiled_data["raw_code"]
                # Store the variable names used in the function.
                # We use a random string as the identifier prefix. Sadly, C89 (and `pycparser`) doesn't support Unicode.
                for length in range(3, 10 + 1):
                    var_identifier_prefix = "v" + "".join(
                        random.choices(string.ascii_lowercase, k=length))
                    if var_identifier_prefix not in decompiled_code:
                        break
                else:
                    # No way this is happening, right?
                    flutes.log(
                        f"{repo_full_name}: Could not find valid identifier prefix for "
                        f"{func_name} in {code_path} ({sha})", "error")
                    continue
                variables: Dict[str, Tuple[str, str]] = {
                }  # (var_id) -> (decompiled_name, original_name)
                for match in DECOMPILED_VAR_REGEX.finditer(decompiled_code):
                    var_id, decompiled_name, original_name = match.groups()
                    var_id = f"{var_identifier_prefix}_{var_id}"
                    if var_id in variables:
                        assert variables[var_id] == (decompiled_name,
                                                     original_name)
                    else:
                        variables[var_id] = (decompiled_name, original_name)
                decompiled_var_names[func_name] = variables
                # Remove irregularities in decompiled code to make the it parsable:
                # - Replace `@@VAR` with special identifiers (literally anything identifier that doesn't clash).
                # - Remove the register allocation indication in `var@<rdi>`.
                decompiled_code = DECOMPILED_VAR_REGEX.sub(
                    rf"{var_identifier_prefix}_\1", decompiled_code)
                decompiled_code = DECOMPILED_REG_ALLOC_REGEX.sub(
                    "", decompiled_code)
                if func_name.startswith("_"):
                    # For some reason, Hexrays would chomp off one leading underscore from function names in their
                    # generated code, which might lead to corrupt code (`_01inverse` -> `01inverse`). Here we
                    # heuristically try to find and replace the changed function name.
                    decompiled_code = re.sub(  # replace all identifiers with matching name
                        r"(?<![a-zA-Z0-9_])" + func_name[1:] +
                        r"(?![a-zA-Z0-9_])", func_name, decompiled_code)
                    # Note that this doesn't fix references of the function in other functions. But really, why would
                    # someone name their function `_01inverse`?
                decompiled_funcs[func_name] = decompiled_code

            # Generate code replacing original functions with decompiled functions.
            replacer = ghcc.parse.FunctionReplacer(decompiled_funcs)
            replaced_code = replacer.visit(original_ast)

            # Obtain AST for decompiled code by parsing it again.
            code_to_preprocess = DECOMPILED_CODE_HEADER + "\n" + replaced_code
            try:
                code_to_parse = ghcc.parse.preprocess(code_to_preprocess)
            except ghcc.parse.PreprocessError as e:
                msg = (
                    f"{repo_full_name}: GCC return value nonzero for decompiled code of "
                    f"{code_path} ({sha})")
                if len(e.args) > 0:
                    msg += ":\n" + str(e)
                flutes.log(msg, "error")
                has_error = True
                continue

            try:
                decompiled_ast, code_to_parse = ghcc.parse.parse_decompiled_code(
                    code_to_parse, lexer, parser)
                decompiled_tokens = ghcc.parse.convert_to_tokens(
                    code_to_parse, parser.clex.cached_tokens)
            except (ValueError, pycparser.c_parser.ParseError) as e:
                flutes.log(
                    f"{repo_full_name}: Could not parse decompiled code for "
                    f"{code_path} ({sha}): {str(e)}", "error")
                has_error = True

                # We don't have ASTs for decompiled functions, but we can still dump the code.
                # Use the dummy typedefs to extract functions.
                code_lines = code_to_parse.split("\n")
                func_begin_end: Dict[str, List[Optional[int]]] = defaultdict(
                    lambda: [None, None])
                for idx, line in enumerate(code_lines):
                    name, is_begin = replacer.extract_func_name(line)
                    if name is not None:
                        func_begin_end[name][0 if is_begin else 1] = idx
                for func_name, (begin, end) in func_begin_end.items():
                    if begin is not None and end is not None and func_name in function_asts:
                        decompiled_func_tokens = lexer.lex("\n".join(
                            code_lines[(begin + 1):end]))
                        original_func_ast = function_asts[func_name]
                        original_ast_json, original_func_tokens = serialize(
                            original_func_ast, original_tokens)
                        matched_func = MatchedFunction(
                            file_path=code_path,
                            binary_hash=sha,
                            func_name=func_name,
                            variable_names=decompiled_var_names[func_name],
                            original_tokens=original_func_tokens,
                            decompiled_tokens=decompiled_func_tokens,
                            original_ast_json=original_ast_json,
                            decompiled_ast_json=None)
                        matched_functions.append(matched_func)

            else:
                # We've successfully parsed decompiled code.
                decompiled_func_asts = extractor.find_functions(decompiled_ast)
                for func_name in decompiled_funcs.keys():
                    original_func_ast = function_asts[func_name]
                    if func_name not in decompiled_func_asts:
                        # Maybe there's other Hexrays-renamed functions that we didn't fix, just ignore them.
                        continue
                    decompiled_func_ast = decompiled_func_asts[func_name]
                    original_ast_json, original_func_tokens = serialize(
                        original_func_ast, original_tokens)
                    decompiled_ast_json, decompiled_func_tokens = serialize(
                        decompiled_func_ast, decompiled_tokens)
                    matched_func = MatchedFunction(
                        file_path=code_path,
                        binary_hash=sha,
                        func_name=func_name,
                        variable_names=decompiled_var_names[func_name],
                        original_tokens=original_func_tokens,
                        decompiled_tokens=decompiled_func_tokens,
                        original_ast_json=original_ast_json,
                        decompiled_ast_json=decompiled_ast_json)
                    matched_functions.append(matched_func)

    # Cleanup the folders; if errors occurred, keep the preprocessed code.
    status = ("success" if not has_error and len(matched_functions) > 0 else (
        "warning" if not has_error or len(matched_functions) > 0 else "error"))
    shutil.rmtree(repo_dir)

    end_time = time.time()
    funcs_without_asts = sum(matched_func.decompiled_ast_json is None
                             for matched_func in matched_functions)
    flutes.log(
        f"[{end_time - start_time:6.2f}s] "
        f"{repo_full_name}: "
        f"Files found: {files_found}/{total_files}, "
        f"functions matched: {len(matched_functions)}/{functions_found} "
        f"({funcs_without_asts} w/o ASTs)",
        status,
        force_console=True)
    return Result(repo_owner=repo_info.repo_owner,
                  repo_name=repo_info.repo_name,
                  matched_functions=matched_functions,
                  preprocessed_original_code=preprocessed_original_code,
                  files_found=files_found,
                  functions_found=functions_found,
                  funcs_without_asts=funcs_without_asts)
コード例 #25
0
from bil import BILVisitor
from bf_out import BrainfuckVisitor


__author__ = 'Michael Storm'


def print_bytecode(bytecode):
    for code in bytecode:
        if code[0] == 'label':
            print('%s:' % code[1])
        else:
            print('\t%s' % ' '.join([str(c) for c in code]))


parser = CParser()

buf = r'''
    static void foo()
    {
        char x;
        if ('\x1') {
            x = '\x5';
            x = '\x3';
        }
    }
'''

c_ast = parser.parse(buf, 'x.c')
c_ast.show()
print("#######")
コード例 #26
0
ファイル: z.py プロジェクト: terry2012/juxta
        method = 'visit_' + node.__class__.__name__
        visitor = getattr(self, method, self.generic_visit)
        return visitor(node)

    def visit_FuncCall(self, node):
        print("Visiting FuncCall")
        print(node.show())
        print('---- parent ----')
        print(self.current_parent.show())

    def generic_visit(self, node):
        """ Called if no explicit visitor function exists for a
            node. Implements preorder visiting of the node.
        """
        oldparent = self.current_parent
        self.current_parent = node
        for c in node.children():
            self.visit(c)
        self.current_parent = oldparent


if __name__ == "__main__":
    source_code = r'''void foo() {
    L"hi" L"there";
}
    '''

    parser = CParser()
    ast = parser.parse(source_code, filename='zz')
    ast.show(showcoord=True, attrnames=True, nodenames=True)
コード例 #27
0
ファイル: demo_pycparser.py プロジェクト: vpoulailleau/ppci
#!/usr/bin/python
""" A demo showing the usage of the preprocessor with pycparsing """

import argparse
import io
from ppci.api import preprocess
from pycparser.c_parser import CParser

if __name__ == '__main__':
    # Argument handling:
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument('source', help='C source file')
    args = arg_parser.parse_args()
    filename = args.source

    # Preprocessing:
    f2 = io.StringIO()
    with open(filename, 'r') as f:
        preprocess(f, f2)
    source = f2.getvalue()

    # Parsing:
    parser = CParser()
    ast = parser.parse(source, filename)
    ast.show()
コード例 #28
0
ファイル: cpp_parser.py プロジェクト: mabuchilab/NiceLib
 def __init__(self, **kwds):
     kwds['lexer'] = CPPLexer
     kwds['lextab'] = 'nicelib.parser.lextab'
     kwds['yacctab'] = 'nicelib.parser.yacctab'
     CParser.__init__(self, **kwds)
コード例 #29
0
ファイル: ffi_mock.py プロジェクト: IslamAbdullatif/CanTp
 def __init__(self, source_string):
     self.static = set()
     self.extern = set()
     self.locals = set()
     self.visit(CParser().parse(source_string))
コード例 #30
0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import copy
from collections import OrderedDict
from pycparser.c_parser import CParser
from pycparser.c_generator import CGenerator
from pycparser.c_ast import NodeVisitor
from pycparser import c_ast

_parser = CParser()
_generator = CGenerator()


def convert_to_ast(expression):
    # this is a trick since pycparser cannot parse expression directly
    ast = _parser.parse('int placeholder(){{{};}}'.format(
        expression)).ext[0].body.block_items[0]
    return ast


def is_node_equal(node_1, node_2):
    """ check if two expression AST nodes are equal since pycparser doesn't provide such property
    :param node_1: First expression node
    :param node_2: Second expression node
    :return: Boolean
コード例 #31
0
ファイル: ffi_mock.py プロジェクト: IslamAbdullatif/CanTp
 def __init__(self, interface, local, extern):
     super(CFFIHeader, self).__init__()
     self.locals = set(e.name for e in local)
     self.extern = set(e.name for e in extern)
     self.mocked = set()
     self.string = self.visit(CParser().parse(interface))
コード例 #32
0
def print_header(message):
    generator = CGenerator()
    parser = CParser()

    def del_spaces(name):
        if name.startswith('(extension in '):
            idx = name.index('):')
            name = '_extension_in_' + name[14:idx] + "__" + name[idx + 2:]

        # file private types
        if ' in _' in name:
            idx = name.index(' in _')
            end = name.index(')', idx)
            start = name.rindex('(', None, idx)
            namespace = name[:start]
            if '>' in namespace:
                namespace = mangle_name(namespace[:-1]) + '.'
            name = namespace + name[start + 1:idx] + name[end + 1:]
        return name

    def mangle_name(human):
        if human in ('void*', 'voidp', 'Metadata*'):
            return human
        if human == '()':
            return 'void'

        info = types[human]
        if 'getGenericParams' in info and info['getGenericParams']:
            name = remove_generic(human)
        else:
            name = human

        if name.startswith('?Unknown type of'):
            name = name.replace('?Unknown type of ', 'XXX_unknown_type_of_')

        if name.startswith("Static #"):
            spl = name.split(' ', 4)
            return "_static_no" + spl[1][1:] + "_in_" + spl[
                3] + "__func" + str(hash(spl[4]))[1:]
        name = del_spaces(name)

        outp = f'swift_{info["kind"]}__'

        if info['kind'] == "Tuple":
            elems = []
            for e in info['tupleElements']:
                name = mangle_name(e['type'])
                if e['label']:
                    name += "__as_" + e['label']
                elems.append(name)
            outp += "with__" + "__and__".join(elems)
        elif info['kind'] == "Existential":
            protos = []
            for p in info['protocols']:
                protos.append(
                    del_spaces(script.exports.demangle(p)).replace(".", "__"))
            if info['isClassBounded']:
                protos.append("Swift__AnyObject")
            if protos:
                outp += "conforming_to__" + "__and__".join(protos)
            else:
                outp += "Any"
            if info.get('getSuperclassConstraint'):
                outp += "__inheriting_from_" + mangle_name(
                    info['getSuperclassConstraint'])
        elif info['kind'] == 'Function':
            return "func_" + str(hash(name))[1:]
        else:
            outp += name.replace(".", "_")

        if 'getGenericParams' in info and info['getGenericParams']:
            gen_params = [
                mangle_name(param) for param in info['getGenericParams']
            ]
            outp += "__of__" + "__and__".join(gen_params)

        return outp

    def make_decl(name, offset, type_name):
        nonlocal decls, pad_count, parser, prev_end

        if isinstance(offset, str):
            assert offset[:2] == '0x'
            offset = int(offset, 16)

        if prev_end < offset:
            pad_str = f"char _padding{pad_count}[{offset - prev_end}];"
            decls.append(parser.parse(pad_str).ext[0])
            pad_count += 1

        type_decl = TypeDecl(name.replace(".", "__"), None,
                             IdentifierType([mangle_name(type_name)]))
        decls.append(Decl(None, None, None, None, type_decl, None, None))

        req_graph.setdefault(type_name, set()).add(parent_name)

        if offset != -1:
            size = pointer_size if type_name.endswith('*') else int(
                types[type_name]['size'], 16)
            prev_end = offset + size

    #print("#include <stdint.h>")
    print("#pragma pack(1)")
    print("typedef void *voidp;")
    print("typedef struct Metadata_s Metadata;")
    types = json.loads(message)

    req_graph = {}
    ptr_types = {'void*', 'voidp', 'Metadata*'}
    ctypes = {}

    for name, info in types.items():
        pad_count = 0
        decls = []
        prev_end = 0
        ctype = None
        parent_name = name
        if info['kind'] == "Tuple":
            for i, elem in enumerate(info['tupleElements']):
                make_decl(elem['label'] or f'_{i}', elem['offset'],
                          elem['type'])
            ctype = Struct(mangle_name(name) + "_s", decls)
        elif info['kind'] == "ObjCClassWrapper":
            print(
                f'typedef struct {mangle_name(name)}_s *{mangle_name(name)};')
        elif info['kind'] in ("Struct", "Class"):
            if info['kind'] == 'Class':
                make_decl('_isa', '0x0', 'Metadata*')
                #make_decl('_refCounts', hex(pointer_size), 'size_t')

            for i, field in enumerate(info['fields']):
                make_decl(field['name'], field['offset'], field['type'])
            ctype = Struct(mangle_name(name) + "_s", decls)

            if info['kind'] == 'Class':
                ctype = PtrDecl(None, ctype)
        elif info['kind'] == "Existential":
            if info['isClassBounded'] or info.get(
                    'getSuperclassConstraint'):  # class existential container
                make_decl(f'heap_object', -1, 'void*')
            else:  # opaque existential container
                decls.append(
                    parser.parse("void *heapObjectOrInlineData0;").ext[0])
                for i in range(1, 3):
                    decls.append(
                        parser.parse(
                            "void *nothingOrInlineData{};".format(i)).ext[0])
                make_decl("dynamicType", -1, "Metadata*")
            for i in range(info['witnessTableCount']):
                make_decl(f'_witnessTable{i + 1}', -1, 'void*')
            ctype = Struct(mangle_name(name) + "_s", decls)
        elif info['kind'] in ("Enum", "Optional"):
            if info['enumCases'] and info['enumCases'][0]['name'] is None:
                # C-like enum
                # we don't have case names or values, so just generate a typedef to an int type
                print(
                    f"typedef uint{int(info['size'], 16) * 8}_t {mangle_name(name)};"
                )
            elif len(info['enumCases']) == 0:
                ctype = Struct(mangle_name(name) + "_s", decls)
            elif len(info['enumCases']) == 1 and info['enumCases'][0]['type']:
                make_decl(info['enumCases'][0]['name'], 0,
                          info['enumCases'][0]['type'])
                ctype = Struct(mangle_name(name) + "_s", decls)
            else:
                print(
                    f'typedef struct {mangle_name(name)}_s {{ char _data[{info["size"]}]; }} {mangle_name(name)};'
                )
        elif info['kind'] == 'Opaque':
            if 'getCType' in info:
                ctype_names = {
                    'pointer': 'void*',
                    'int8': 'int8_t',
                    'int16': 'int16_t',
                    'int32': 'int32_t',
                    'int64': 'int64_t',
                    'int64': 'int64_t',
                }
                print(
                    f'typedef {ctype_names[info["getCType"]]} {mangle_name(name)};'
                )
            elif name == 'Builtin.NativeObject':
                print(f'typedef void *{mangle_name(name)};')
            else:
                print(f'typedef char {mangle_name(name)}[{info["size"]}];')
        elif info['kind'] == 'Function':
            print(f"typedef void *func_{str(hash(name))[1:]};"
                  )  # TODO: proper names
        else:
            print(f'typedef char {mangle_name(name)}[{info["size"]}];')

        if ctype:
            type_decl = TypeDecl(mangle_name(name), None, ctype)
            ctypes[name] = type_decl
            type_decl_forward = Struct(mangle_name(name) + "_s", [])
            if isinstance(type_decl, PtrDecl):
                ptr_types.add(name)
                type_decl_forward = PtrDecl(None, type_decl_forward)
                print(
                    generator.visit(
                        Typedef(mangle_name(name), None, ['typedef'],
                                type_decl_forward)) + ";")

    for name in ptr_types:
        req_graph.pop(name, None)

    for name in top_sort(req_graph):
        if name in ctypes:
            print(f"\n// {name}")
            print(
                generator.visit(
                    Typedef(mangle_name(name), None, ['typedef'],
                            ctypes[name])) + ";")
コード例 #33
0
from unittest import TestCase

from compat import MagicMock

from pycparser.c_parser import CParser
from pycparser.c_generator import CGenerator

from automock import MockGenerator
from automock import MockInfo, ReturnHint
from automock import ArgInfo, ArgHint

from os import path

# CParser() takes about a second to run on my machine, so create it
# only once instead of in setUp() for every test
cparser = CParser()
cgen = CGenerator()
emptyast = cparser.parse('')
defaulthname = "../mockable.h"


class MockGeneratorTests(TestCase):
    def setUp(self):
        self.maxDiff = None
        self.mpaths = MagicMock()
        self.mpaths.headerpath = defaulthname

    def test_shouldGenerateMockFromOtherwiseEmptyHeader(self):
        # Given
        mgen = MockGenerator(self.mpaths, cgen,
                             cparser.parse("void func1(void);", defaulthname))
コード例 #34
0
    #--------------- Lexing
    #~ def errfoo(msg, a, b):
    #~ printme(msg)
    #~ sys.exit()
    #~ clex = CLexer(errfoo, lambda t: False)
    #~ clex.build()
    #~ clex.input(source_code)

    #~ while 1:
    #~ tok = clex.token()
    #~ if not tok: break

    #~ printme([tok.value, tok.type, tok.lineno, clex.filename, tok.lexpos])

    #--------------- Parsing
    source_code = r'''
    typedef int int8_t;
    typedef signed char int8_t;
    
    
    '''

    parser = CParser(lex_optimize=False, yacc_optimize=False, yacc_debug=True)
    ast = parser.parse(source_code, filename='zz')
    ast.show(showcoord=False, attrnames=True, nodenames=True)
    #~ nv=NodeVisitor()
    #~ nv.visit(ast)

    print('-- done --')
コード例 #35
0
ファイル: parser.py プロジェクト: xcode2010/ghcc
def parse_decompiled_code(code: str,
                          lexer: LexerWrapper,
                          parser: CParser,
                          max_type_fix_tries: int = 10) -> Tuple[ASTNode, str]:
    r"""Parse preprocessed decompiled code and heuristically fix errors caused by undefined types.

    If a parse error is encountered, we attempt to fix the code by parsing the error message and checking whether if
    could be an undefined type error. If it is, we prepend a dummy ``typedef`` and retry parsing, until either the code
    parses or we run out of tries.

    :raises ValueError: When we've run out of tries for fixing types, or the issue cannot be resolved by adding a
        ``typedef`` (i.e., getting the same error after adding ``typedef``).
    :raises pycparser.c_parser.ParseError: When we cannot identify the error.

    :param code: The preprocessed code to parse
    :param lexer: The lexer to use while parsing.
    :param parser: The parser to use while parsing.
    :param max_type_fix_tries: Maximum retries to fix type errors.
    :return: A tuple containing the parsed AST and the modified code.
    """
    added_types: Set[str] = set()
    code_lines = code.split("\n")
    for _ in range(max_type_fix_tries):
        try:
            decompiled_ast = parser.parse(code)
            break
        except pycparser.c_parser.ParseError as e:
            error_match = PARSE_ERROR_REGEX.match(str(e))
            if error_match is None or not error_match.group("msg").startswith(
                    "before: "):
                raise
            before_token = remove_prefix(error_match.group("msg"), "before: ")
            error_line = code_lines[int(error_match.group("line")) - 1]
            error_pos = int(error_match.group("col")) - 1
            tokens = list(lexer.lex_tokens(error_line))
            try:
                error_token_idx = next(idx for idx, token in enumerate(tokens)
                                       if token.lexpos == error_pos
                                       and token.value == before_token)
                # There are multiple possible cases here:
                # 1. The type is the first ID-type token before the reported token (`type token`). It might not
                #    be the one immediately in front (for example, `(type) token`, `type *token`).
                # 2. The type is the token itself. This is rare and only happens in a situation like:
                #      `int func(const token var)`  or  `int func(int a, token b)`
                #    Replacing `const` with any combination of type qualifiers also works.
                if (error_token_idx > 0
                        and tokens[error_token_idx - 1].type in [
                            "CONST", "VOLATILE", "RESTRICT", "__CONST",
                            "__RESTRICT", "__EXTENSION__", "COMMA"
                        ]):
                    type_token = tokens[error_token_idx]
                else:
                    type_token = next(tokens[idx]
                                      for idx in range(error_token_idx -
                                                       1, -1, -1)
                                      if tokens[idx].type == "ID")
            except StopIteration:
                # If we don't catch this, it would terminate the for-loop in `main()`. Stupid design.
                raise e from None

            if type_token.value in added_types:
                raise ValueError(
                    f"Type {type_token.value} already added (types so far: {list(added_types)})"
                )
            added_types.add(type_token.value)
            typedef_line = f"typedef int {type_token.value};"
            code = typedef_line + "\n" + code
            code_lines.insert(0, typedef_line)
    else:
        raise ValueError(f"Type fixes exceeded limit ({max_type_fix_tries})")
    return decompiled_ast, code