def main(file_path): dir_and_name = file_path.split('/') if len(dir_and_name) > 1: tests_dir = dir_and_name[0] file_name = dir_and_name[1] else: tests_dir = "." file_name = dir_and_name[0] file_content = "" lines = open(file_path, "rt").readlines() for line in lines: if line.startswith("#include"): continue file_content += line parser = CParser() generator = CGenerator() ast = parser.parse(file_content, file_name, debuglevel=0) test_finder = MethodVisitor() test_finder.visit(ast) include_text = "" suite_code = build_main(test_finder.test_functions, file_name, include_text) suite_file = open("/".join([tests_dir, "suite.c"]), "wt") suite_file.write(suite_code) suite_file.close()
def backward_call(decl): parser = CParser() decl = parser.parse(decl, filename='<stdin>').ext[0] name = decl.name args = decl.type.args nargs = len(args.params) if len(decl.type.type.type.names) > 1: assert False else: rtype = decl.type.type.type.names[0] ndecl = rtype + ' ' + name[len('cephes_ '):] + '(' call_expr = name + '(' for param in args.params: if len(param.type.type.names) > 1: assert False typ = param.type.type.names[0] ndecl += typ + ' ' + param.name + ', ' call_expr += param.name + ', ' if nargs > 0: ndecl = ndecl[:-2] call_expr = call_expr[:-2] ndecl += ')' call_expr += ')' ndecl += " { return %s; }" % call_expr return ndecl
def parse(file_content, file_name): parser = CParser() generator = CGenerator() ast = parser.parse(file_content, file_name, debuglevel=0) test_finder = ASTVisitor() test_finder.visit(ast) return test_finder
def parse_cstruct(code, cls=None): global TYPE_TRL_TABLE cp = CParser() st = cp.parse(PREPEND_TYPES + '\n' + code) decls = list(st.children()) mystruct = decls.pop()[1] if not TYPE_TRL_TABLE: TYPE_TRL_TABLE = mk_trltable(decls) fields = {'_fields_': get_fields(mystruct)} return type(mystruct.name, (cls or c.Structure, ), fields)
def parse(text, filename='', parser=None, fake_typedefs=False): if parser is None: parser = CParser() if fake_typedefs: text = ''.join((fake.typedefs, f'# 1 "{filename}"\n', text)) ast = parser.parse(text, filename) for i in range(len(ast.ext)): node = ast.ext[i] if isinstance(node, c_ast.Typedef) and node.name == '__end_of_fakes__': break del ast.ext[:i + 1] return ast
def build(self, data_dir): for i in range(1, 105): data_subdir = data_dir + "/" + str(i) for file_name in os.listdir(data_subdir): num = int(file_name[:-4]) name = data_subdir + "/" + file_name with open(name, errors="ignore") as f: code = f.read() parser = CParser() ast = parser.parse(comment_remover(code)) visitor = CodeToWordVisitor() visitor.visit(ast) seq = visitor.pre_order self._add_words(seq) self.data[(i, num)] = self._words2data(seq) print("Directory {} built".format(i)) print("Vocabulary Size: {}".format(len(self.dictionary)))
def api_fdecls(decl): parser = CParser() decl = parser.parse(decl, filename='<stdin>').ext[0] name = decl.name args = decl.type.args nargs = len(args.params) if len(decl.type.type.type.names) > 1: assert False else: rtype = decl.type.type.type.names[0] ndecl = rtype + ' ncephes_' + _rcs(name) + '(' for param in args.params: if len(param.type.type.names) > 1: assert False typ = param.type.type.names[0] ndecl += typ + ' ' + param.name + ', ' if nargs > 0: ndecl = ndecl[:-2] return ndecl + ');'
def parse_file(filename, use_cpp=False, cpp_path='cpp', cpp_args='', parser=None): """ Parse a C file using pycparser. filename: Name of the file you want to parse. use_cpp: Set to True if you want to execute the C pre-processor on the file prior to parsing it. cpp_path: If use_cpp is True, this is the path to 'cpp' on your system. If no path is provided, it attempts to just execute 'cpp', so it must be in your PATH. cpp_args: If use_cpp is True, set this to the command line arguments strings to cpp. Be careful with quotes - it's best to pass a raw string (r'') here. For example: r'-I../utils/fake_libc_include' If several arguments are required, pass a list of strings. parser: Optional parser object to be used instead of the default CParser When successful, an AST is returned. ParseError can be thrown if the file doesn't parse successfully. Errors from cpp will be printed out. """ if use_cpp: text = preprocess_file(filename, cpp_path, cpp_args) else: with io.open(filename) as f: text = f.read() if parser is None: parser = CParser() return parser.parse(text, filename)
def compile(code): parser = CParser() stypes = 'u8 i8 u16 i16 u32 i32 u64 i64 f32 f64 f128' code = 'void runner() { ' + code + ' ; }' for type in stypes.split(' '): code = 'typedef void %s; %s' % (type, code) ast = parser.parse(code) found = None for _, child in ast.children(): if isinstance(child, FuncDef): found = child break assert found is not None assert len(found.body.children()) == 1 ast = found.body.children()[0][1] sexp = AstTranslator().process(ast) def run(ctu): return bare(SexpRunner(ctu).run(sexp)) return run
def print_header(message): generator = CGenerator() parser = CParser() def del_spaces(name): if name.startswith('(extension in '): idx = name.index('):') name = '_extension_in_' + name[14:idx] + "__" + name[idx + 2:] # file private types if ' in _' in name: idx = name.index(' in _') end = name.index(')', idx) start = name.rindex('(', None, idx) namespace = name[:start] if '>' in namespace: namespace = mangle_name(namespace[:-1]) + '.' name = namespace + name[start + 1:idx] + name[end + 1:] return name def mangle_name(human): if human in ('void*', 'voidp', 'Metadata*'): return human if human == '()': return 'void' info = types[human] if 'getGenericParams' in info and info['getGenericParams']: name = remove_generic(human) else: name = human if name.startswith('?Unknown type of'): name = name.replace('?Unknown type of ', 'XXX_unknown_type_of_') if name.startswith("Static #"): spl = name.split(' ', 4) return "_static_no" + spl[1][1:] + "_in_" + spl[ 3] + "__func" + str(hash(spl[4]))[1:] name = del_spaces(name) outp = f'swift_{info["kind"]}__' if info['kind'] == "Tuple": elems = [] for e in info['tupleElements']: name = mangle_name(e['type']) if e['label']: name += "__as_" + e['label'] elems.append(name) outp += "with__" + "__and__".join(elems) elif info['kind'] == "Existential": protos = [] for p in info['protocols']: protos.append( del_spaces(script.exports.demangle(p)).replace(".", "__")) if info['isClassBounded']: protos.append("Swift__AnyObject") if protos: outp += "conforming_to__" + "__and__".join(protos) else: outp += "Any" if info.get('getSuperclassConstraint'): outp += "__inheriting_from_" + mangle_name( info['getSuperclassConstraint']) elif info['kind'] == 'Function': return "func_" + str(hash(name))[1:] else: outp += name.replace(".", "_") if 'getGenericParams' in info and info['getGenericParams']: gen_params = [ mangle_name(param) for param in info['getGenericParams'] ] outp += "__of__" + "__and__".join(gen_params) return outp def make_decl(name, offset, type_name): nonlocal decls, pad_count, parser, prev_end if isinstance(offset, str): assert offset[:2] == '0x' offset = int(offset, 16) if prev_end < offset: pad_str = f"char _padding{pad_count}[{offset - prev_end}];" decls.append(parser.parse(pad_str).ext[0]) pad_count += 1 type_decl = TypeDecl(name.replace(".", "__"), None, IdentifierType([mangle_name(type_name)])) decls.append(Decl(None, None, None, None, type_decl, None, None)) req_graph.setdefault(type_name, set()).add(parent_name) if offset != -1: size = pointer_size if type_name.endswith('*') else int( types[type_name]['size'], 16) prev_end = offset + size #print("#include <stdint.h>") print("#pragma pack(1)") print("typedef void *voidp;") print("typedef struct Metadata_s Metadata;") types = json.loads(message) req_graph = {} ptr_types = {'void*', 'voidp', 'Metadata*'} ctypes = {} for name, info in types.items(): pad_count = 0 decls = [] prev_end = 0 ctype = None parent_name = name if info['kind'] == "Tuple": for i, elem in enumerate(info['tupleElements']): make_decl(elem['label'] or f'_{i}', elem['offset'], elem['type']) ctype = Struct(mangle_name(name) + "_s", decls) elif info['kind'] == "ObjCClassWrapper": print( f'typedef struct {mangle_name(name)}_s *{mangle_name(name)};') elif info['kind'] in ("Struct", "Class"): if info['kind'] == 'Class': make_decl('_isa', '0x0', 'Metadata*') #make_decl('_refCounts', hex(pointer_size), 'size_t') for i, field in enumerate(info['fields']): make_decl(field['name'], field['offset'], field['type']) ctype = Struct(mangle_name(name) + "_s", decls) if info['kind'] == 'Class': ctype = PtrDecl(None, ctype) elif info['kind'] == "Existential": if info['isClassBounded'] or info.get( 'getSuperclassConstraint'): # class existential container make_decl(f'heap_object', -1, 'void*') else: # opaque existential container decls.append( parser.parse("void *heapObjectOrInlineData0;").ext[0]) for i in range(1, 3): decls.append( parser.parse( "void *nothingOrInlineData{};".format(i)).ext[0]) make_decl("dynamicType", -1, "Metadata*") for i in range(info['witnessTableCount']): make_decl(f'_witnessTable{i + 1}', -1, 'void*') ctype = Struct(mangle_name(name) + "_s", decls) elif info['kind'] in ("Enum", "Optional"): if info['enumCases'] and info['enumCases'][0]['name'] is None: # C-like enum # we don't have case names or values, so just generate a typedef to an int type print( f"typedef uint{int(info['size'], 16) * 8}_t {mangle_name(name)};" ) elif len(info['enumCases']) == 0: ctype = Struct(mangle_name(name) + "_s", decls) elif len(info['enumCases']) == 1 and info['enumCases'][0]['type']: make_decl(info['enumCases'][0]['name'], 0, info['enumCases'][0]['type']) ctype = Struct(mangle_name(name) + "_s", decls) else: print( f'typedef struct {mangle_name(name)}_s {{ char _data[{info["size"]}]; }} {mangle_name(name)};' ) elif info['kind'] == 'Opaque': if 'getCType' in info: ctype_names = { 'pointer': 'void*', 'int8': 'int8_t', 'int16': 'int16_t', 'int32': 'int32_t', 'int64': 'int64_t', 'int64': 'int64_t', } print( f'typedef {ctype_names[info["getCType"]]} {mangle_name(name)};' ) elif name == 'Builtin.NativeObject': print(f'typedef void *{mangle_name(name)};') else: print(f'typedef char {mangle_name(name)}[{info["size"]}];') elif info['kind'] == 'Function': print(f"typedef void *func_{str(hash(name))[1:]};" ) # TODO: proper names else: print(f'typedef char {mangle_name(name)}[{info["size"]}];') if ctype: type_decl = TypeDecl(mangle_name(name), None, ctype) ctypes[name] = type_decl type_decl_forward = Struct(mangle_name(name) + "_s", []) if isinstance(type_decl, PtrDecl): ptr_types.add(name) type_decl_forward = PtrDecl(None, type_decl_forward) print( generator.visit( Typedef(mangle_name(name), None, ['typedef'], type_decl_forward)) + ";") for name in ptr_types: req_graph.pop(name, None) for name in top_sort(req_graph): if name in ctypes: print(f"\n// {name}") print( generator.visit( Typedef(mangle_name(name), None, ['typedef'], ctypes[name])) + ";")
from compat import MagicMock from pycparser.c_parser import CParser from pycparser.c_generator import CGenerator from automock import MockGenerator from automock import MockInfo, ReturnHint from automock import ArgInfo, ArgHint from os import path # CParser() takes about a second to run on my machine, so create it # only once instead of in setUp() for every test cparser = CParser() cgen = CGenerator() emptyast = cparser.parse('') defaulthname = "../mockable.h" class MockGeneratorTests(TestCase): def setUp(self): self.maxDiff = None self.mpaths = MagicMock() self.mpaths.headerpath = defaulthname def test_shouldGenerateMockFromOtherwiseEmptyHeader(self): # Given mgen = MockGenerator(self.mpaths, cgen, cparser.parse("void func1(void);", defaulthname)) # When mocks = mgen.mocks
def match_functions( repo_info: RepoInfo, archive_folder: str, temp_folder: str, decompile_folder: str, use_fake_libc_headers: bool = True, preprocess_timeout: Optional[int] = None, *, progress_bar: Optional[flutes.ProgressBarManager.Proxy] = None ) -> Result: # Directions: # 1. Clone or extract from archive. # 2. For each Makefile, rerun the compilation process with the flag "-E", so only the preprocessor is run. # This probably won't take long as the compiler exits after running the processor, and linking would fail. # Also, consider using "-nostdlib -Ipath/to/fake_libc_include" as suggested by `pycparser`. # 3. The .o files are now preprocessed C code. Parse them using `pycparser` to obtain a list of functions. start_time = time.time() total_files = sum( len(makefile) for makefile in repo_info.makefiles.values()) repo_folder_name = f"{repo_info.repo_owner}_____{repo_info.repo_name}" repo_full_name = f"{repo_info.repo_owner}/{repo_info.repo_name}" archive_path = (Path(archive_folder) / f"{repo_full_name}.tar.gz").absolute() repo_dir = (Path(temp_folder) / repo_folder_name).absolute() repo_src_path = repo_dir / "src" repo_binary_dir = repo_dir / "bin" repo_binary_dir.mkdir(parents=True, exist_ok=True) has_error = False if progress_bar is not None: worker_id = flutes.get_worker_id() process_name = f"Worker {worker_id}" if worker_id is not None else "Main Process" progress_bar.new(total=total_files, desc=process_name + f" [{repo_full_name}]") flutes.log(f"Begin processing {repo_full_name} ({total_files} files)") if os.path.exists(archive_path): # Extract archive flutes.run_command(["tar", f"xzf", str(archive_path)], cwd=str(repo_dir)) (repo_dir / repo_folder_name).rename(repo_src_path) else: # Clone repo if repo_src_path.exists(): shutil.rmtree(repo_src_path) ret = ghcc.clone(repo_info.repo_owner, repo_info.repo_name, clone_folder=str(repo_dir), folder_name="src") if ret.error_type not in [None, ghcc.CloneErrorType.SubmodulesFailed]: flutes.log( f"Failed to clone {repo_full_name}: error type {ret.error_type}", "error") # Return a dummy result so this repo is ignored in the future. return Result(repo_info.repo_owner, repo_info.repo_name, [], {}, 0, 0, 0) # Write makefile info to pickle with (repo_binary_dir / "makefiles.pkl").open("wb") as f_pkl: pickle.dump(repo_info.makefiles, f_pkl) gcc_flags = "-E" directory_mapping = None if use_fake_libc_headers: gcc_flags = f"-E -nostdlib -I/usr/src/libc" directory_mapping = {ghcc.parse.FAKE_LIBC_PATH: "/usr/src/libc"} if progress_bar is not None: progress_bar.update(postfix={"status": "preprocessing"}) makefiles = ghcc.docker_batch_compile( str(repo_binary_dir), str(repo_src_path), compile_timeout=preprocess_timeout, gcc_override_flags=gcc_flags, use_makefile_info_pkl=True, directory_mapping=directory_mapping, user_id=(repo_info.idx % 10000) + 30000, # user IDs 30000 ~ 39999 exception_log_fn=functools.partial(exception_handler, repo_info=repo_info)) parser = CParser(lexer=ghcc.parse.CachedCLexer) lexer = ghcc.parse.LexerWrapper() decompile_path = Path(decompile_folder) extractor = ghcc.parse.FunctionExtractor() matched_functions: List[MatchedFunction] = [] preprocessed_original_code: Dict[str, str] = {} files_found = 0 functions_found = 0 for makefile in makefiles: mkfile_dir = Path(makefile['directory']) for path, sha in zip(makefile["binaries"], makefile["sha256"]): # Load and parse preprocessed original code. code_path = str(mkfile_dir / path) json_path = decompile_path / (sha + ".jsonl") preprocessed_code_path = repo_binary_dir / sha if progress_bar is not None: progress_bar.update(1, postfix={"file": code_path}) if not json_path.exists() or not preprocessed_code_path.exists(): continue try: with preprocessed_code_path.open("r") as f: code = f.read() code = LINE_CONTROL_REGEX.sub("", code) except UnicodeDecodeError: continue # probably a real binary file preprocessed_original_code[sha] = code try: original_ast: ASTNode = parser.parse(code, filename=os.path.join( repo_full_name, path)) except (pycparser.c_parser.ParseError, AssertionError) as e: # For some reason `pycparser` uses `assert`s in places where there should have been a check. flutes.log( f"{repo_full_name}: Parser error when processing file " f"{code_path} ({sha}): {str(e)}", "error") has_error = True continue # ignore parsing errors original_tokens = ghcc.parse.convert_to_tokens( code, parser.clex.cached_tokens) files_found += 1 function_asts = extractor.find_functions(original_ast) functions_found += len(function_asts) # Collect decompiled functions with matching original code. with json_path.open("r") as f: decompiled_json = [ line for line in f if line ] # don't decode, as we only need the function name decompiled_funcs: Dict[str, str] = {} # (func_name) -> decompiled_code decompiled_var_names: Dict[str, Dict[str, Tuple[str, str]]] = {} \ # (func_name) -> (var_id) -> (decomp_name, orig_name) for line_num, j in enumerate(decompiled_json): # Find function name from JSON line without parsing. match = JSON_FUNC_NAME_REGEX.search(j) assert match is not None func_name = match.group(1) if func_name not in function_asts: continue try: decompiled_data = json.loads(j) except json.JSONDecodeError as e: flutes.log( f"{repo_full_name}: Decode error when reading JSON file at {json_path}: " f"{str(e)}", "error") continue decompiled_code = decompiled_data["raw_code"] # Store the variable names used in the function. # We use a random string as the identifier prefix. Sadly, C89 (and `pycparser`) doesn't support Unicode. for length in range(3, 10 + 1): var_identifier_prefix = "v" + "".join( random.choices(string.ascii_lowercase, k=length)) if var_identifier_prefix not in decompiled_code: break else: # No way this is happening, right? flutes.log( f"{repo_full_name}: Could not find valid identifier prefix for " f"{func_name} in {code_path} ({sha})", "error") continue variables: Dict[str, Tuple[str, str]] = { } # (var_id) -> (decompiled_name, original_name) for match in DECOMPILED_VAR_REGEX.finditer(decompiled_code): var_id, decompiled_name, original_name = match.groups() var_id = f"{var_identifier_prefix}_{var_id}" if var_id in variables: assert variables[var_id] == (decompiled_name, original_name) else: variables[var_id] = (decompiled_name, original_name) decompiled_var_names[func_name] = variables # Remove irregularities in decompiled code to make the it parsable: # - Replace `@@VAR` with special identifiers (literally anything identifier that doesn't clash). # - Remove the register allocation indication in `var@<rdi>`. decompiled_code = DECOMPILED_VAR_REGEX.sub( rf"{var_identifier_prefix}_\1", decompiled_code) decompiled_code = DECOMPILED_REG_ALLOC_REGEX.sub( "", decompiled_code) if func_name.startswith("_"): # For some reason, Hexrays would chomp off one leading underscore from function names in their # generated code, which might lead to corrupt code (`_01inverse` -> `01inverse`). Here we # heuristically try to find and replace the changed function name. decompiled_code = re.sub( # replace all identifiers with matching name r"(?<![a-zA-Z0-9_])" + func_name[1:] + r"(?![a-zA-Z0-9_])", func_name, decompiled_code) # Note that this doesn't fix references of the function in other functions. But really, why would # someone name their function `_01inverse`? decompiled_funcs[func_name] = decompiled_code # Generate code replacing original functions with decompiled functions. replacer = ghcc.parse.FunctionReplacer(decompiled_funcs) replaced_code = replacer.visit(original_ast) # Obtain AST for decompiled code by parsing it again. code_to_preprocess = DECOMPILED_CODE_HEADER + "\n" + replaced_code try: code_to_parse = ghcc.parse.preprocess(code_to_preprocess) except ghcc.parse.PreprocessError as e: msg = ( f"{repo_full_name}: GCC return value nonzero for decompiled code of " f"{code_path} ({sha})") if len(e.args) > 0: msg += ":\n" + str(e) flutes.log(msg, "error") has_error = True continue try: decompiled_ast, code_to_parse = ghcc.parse.parse_decompiled_code( code_to_parse, lexer, parser) decompiled_tokens = ghcc.parse.convert_to_tokens( code_to_parse, parser.clex.cached_tokens) except (ValueError, pycparser.c_parser.ParseError) as e: flutes.log( f"{repo_full_name}: Could not parse decompiled code for " f"{code_path} ({sha}): {str(e)}", "error") has_error = True # We don't have ASTs for decompiled functions, but we can still dump the code. # Use the dummy typedefs to extract functions. code_lines = code_to_parse.split("\n") func_begin_end: Dict[str, List[Optional[int]]] = defaultdict( lambda: [None, None]) for idx, line in enumerate(code_lines): name, is_begin = replacer.extract_func_name(line) if name is not None: func_begin_end[name][0 if is_begin else 1] = idx for func_name, (begin, end) in func_begin_end.items(): if begin is not None and end is not None and func_name in function_asts: decompiled_func_tokens = lexer.lex("\n".join( code_lines[(begin + 1):end])) original_func_ast = function_asts[func_name] original_ast_json, original_func_tokens = serialize( original_func_ast, original_tokens) matched_func = MatchedFunction( file_path=code_path, binary_hash=sha, func_name=func_name, variable_names=decompiled_var_names[func_name], original_tokens=original_func_tokens, decompiled_tokens=decompiled_func_tokens, original_ast_json=original_ast_json, decompiled_ast_json=None) matched_functions.append(matched_func) else: # We've successfully parsed decompiled code. decompiled_func_asts = extractor.find_functions(decompiled_ast) for func_name in decompiled_funcs.keys(): original_func_ast = function_asts[func_name] if func_name not in decompiled_func_asts: # Maybe there's other Hexrays-renamed functions that we didn't fix, just ignore them. continue decompiled_func_ast = decompiled_func_asts[func_name] original_ast_json, original_func_tokens = serialize( original_func_ast, original_tokens) decompiled_ast_json, decompiled_func_tokens = serialize( decompiled_func_ast, decompiled_tokens) matched_func = MatchedFunction( file_path=code_path, binary_hash=sha, func_name=func_name, variable_names=decompiled_var_names[func_name], original_tokens=original_func_tokens, decompiled_tokens=decompiled_func_tokens, original_ast_json=original_ast_json, decompiled_ast_json=decompiled_ast_json) matched_functions.append(matched_func) # Cleanup the folders; if errors occurred, keep the preprocessed code. status = ("success" if not has_error and len(matched_functions) > 0 else ( "warning" if not has_error or len(matched_functions) > 0 else "error")) shutil.rmtree(repo_dir) end_time = time.time() funcs_without_asts = sum(matched_func.decompiled_ast_json is None for matched_func in matched_functions) flutes.log( f"[{end_time - start_time:6.2f}s] " f"{repo_full_name}: " f"Files found: {files_found}/{total_files}, " f"functions matched: {len(matched_functions)}/{functions_found} " f"({funcs_without_asts} w/o ASTs)", status, force_console=True) return Result(repo_owner=repo_info.repo_owner, repo_name=repo_info.repo_name, matched_functions=matched_functions, preprocessed_original_code=preprocessed_original_code, files_found=files_found, functions_found=functions_found, funcs_without_asts=funcs_without_asts)
class ForgivingDeclarationParser: def __init__(self, source_code, functions, rename_parameters_file=None): self.source_code = source_code self.functions = functions self.token_stream = self.tokenize(source_code) self.previous = None self.current = None self.current_file = None self.chunks_to_erase = [] self.bracket_stack = [] self.source_context = [] self.typedefs_code = ['typedef int __builtin_va_list;'] self.typedefs = {} self.structs_code = [] self.structs = [] self.struct_typedefs = [] self.includes = [] self.cparser = CParser() self.param_names = None if rename_parameters_file is not None: self.param_names = load_param_names(rename_parameters_file) self.func_names = [] self.func_signatures = [] self.file_ast = None self.mocked_functions = [] self.parse() if self.functions: for function in sorted(functions): print( f"error: Mocked function '{function}' undeclared. Add " "missing include in the test file.", file=sys.stderr) raise Exception( 'Unable to find declarations of all mocked functions. Add missing ' 'include(s) in the test file.') @classmethod def tokenize(cls, source_code): for match in RE_TOKEN.finditer(source_code): if match.lastgroup not in IGNORED_TOKENS: yield Token(match.lastgroup, match.group().strip(), match.span()) def parse(self): while self.next(): if self.current.is_keyword('typedef'): self.parse_typedef() parsed = self.parse_function_declaration_or_struct() if parsed is not None: self.func_names.append(parsed[0]) self.func_signatures.append(parsed[1]) self.functions.remove(parsed[0]) if not self.functions: break while self.bracket_stack or not self.current.is_punctuation( ';', '}'): self.next() if self.functions: return code = '\n'.join(self.typedefs_code + self.structs_code + self.func_signatures) self.file_ast = self.cparser.parse(code) func_offset = len(self.typedefs_code + self.structs_code) for i, func_name in enumerate(self.func_names, func_offset): if self.param_names is None: func_declaration = self.file_ast.ext[i] else: func_declaration = rename_parameters( self.file_ast.ext[i], self.param_names.get(func_name)) self.mocked_functions.append( MockedFunction(func_name, func_declaration)) self.load_typedefs() self.load_structs() def resolve_type(self, type_): if isinstance(type_, c_ast.IdentifierType): name = ' '.join(type_.names) if name in PRIMITIVE_TYPES or name == '_Bool': return PrimitiveType(name) elif name == '__builtin_va_list': return VaList() elif name == 'void': return VoidType() else: return self.resolve_type(self.lookup_typedef(name).type) elif isinstance( type_, (c_ast.Union, c_ast.Struct, c_ast.FuncDecl, c_ast.Enum)): return type_ elif isinstance(type_, c_ast.TypeDecl): return self.resolve_type(type_.type) elif isinstance(type_, c_ast.ArrayDecl): if type_.dim is None: return self.resolve_type(type_.type) else: return self.resolve_type(type_.type) elif isinstance(type_, c_ast.PtrDecl): return self.resolve_type(type_.type) else: raise Exception(f'Unknown type {type_}.') def expand_type(self, type_): if isinstance(type_, c_ast.IdentifierType): name = ' '.join(type_.names) if name in PRIMITIVE_TYPES: pass elif name in ['__builtin_va_list', 'void', '_Bool']: pass else: type_ = self.expand_type(self.lookup_typedef(name).type) elif isinstance( type_, (c_ast.Union, c_ast.Struct, c_ast.FuncDecl, c_ast.Enum)): pass elif isinstance(type_, c_ast.TypeDecl): type_.type = self.expand_type(type_.type) elif isinstance(type_, (c_ast.PtrDecl, c_ast.ArrayDecl)): type_.type = self.expand_type(type_.type) else: raise Exception(f'Unknown type {type_}.') return type_ def lookup_typedef(self, name): if name in self.typedefs: return self.typedefs[name] def load_struct_member(self, member): items = [] expanded_type = self.expand_type(member.type) type_ = self.resolve_type(member.type) if is_fixed_array(expanded_type): items.append(['assert-array-eq', member.name]) elif is_pointer_or_array(expanded_type): pass elif isinstance(type_, (PrimitiveType, c_ast.Enum)): if member.bitsize is None: items.append(['assert-eq', member.name]) else: items.append(['assert-eq-bit-field', member.name, type_.name]) elif is_struct(expanded_type): if type_.name is None: for item in self.load_struct_members(type_): item[1] = f'{member.name}.{item[1]}' items.append(item) else: items.append(['assert-struct', member.name, type_.name]) return items def load_struct_members(self, struct): if not struct.decls: return [] items = [] for member in struct.decls: if member.name is None: continue items += self.load_struct_member(member) return items def load_structs(self): for item in self.file_ast: expanded_type = self.expand_type(item.type) if not is_struct(expanded_type): continue type_ = self.resolve_type(expanded_type) if isinstance(item, c_ast.Typedef): if type_.decls is None: continue items = self.load_struct_members(type_) self.struct_typedefs.append((item.name, items)) else: items = self.load_struct_members(type_) self.structs.append((type_.name, items)) def load_typedefs(self): for item in self.file_ast: if isinstance(item, c_ast.Typedef): self.typedefs[item.name] = item def next(self): self.previous = self.current self.current = next(self.token_stream, None) if not self.current: return None if self.current.type == 'PUNCTUATION': if self.current.value in '({[': self.bracket_stack.append(')}]'['({['.index( self.current.value)]) elif self.bracket_stack and self.current.value == self.bracket_stack[ -1]: self.bracket_stack.pop() elif self.current.type == 'LINEMARKER': filename, flags = LINEMARKER.match(self.current.value).groups() if not flags and len(self.source_context) == 0: self.current_file = filename if self.current_file not in ['<built-in>', '<command-line>']: if '1' in flags: self.source_context.append(filename) if len(self.source_context) == 1: self.includes.append( IncludeDirective.from_source_context( self.source_context)) elif '2' in flags: self.source_context.pop() self.mark_for_erase(*self.current.span) self.next() elif self.current.is_keyword('__attribute__'): begin = self.current.span[0] stack_depth = len(self.bracket_stack) self.next() while len(self.bracket_stack) > stack_depth: self.next() self.mark_for_erase(begin, self.current.span[1]) elif self.current.is_keyword('__extension__', '__restrict', '__signed__', '__signed', '_Nullable'): self.mark_for_erase(*self.current.span) self.next() elif self.current.type == 'PRAGMA': self.mark_for_erase(*self.current.span) self.next() return self.current def parse_typedef(self): begin = self.current.span[0] while self.bracket_stack or not self.current.is_punctuation(';'): self.next() code = self.read_source_code(begin, self.current.span[1]) self.typedefs_code.append(code) def parse_struct(self, begin, _name): while self.bracket_stack: self.next() code = self.read_source_code(begin, self.current.span[1]) + ';' if self.is_in_header_file(): self.structs_code.append(code) def is_in_header_file(self): return len(self.source_context) > 0 def parse_function_declaration_or_struct(self): while self.current.is_prefix: self.next() begin = self.current.span[0] return_type = [] if self.current.is_keyword('struct'): if self.next() and self.current.type == 'IDENTIFIER': struct_name = self.current.value if self.next() and self.current.value == '{': self.parse_struct(begin, struct_name) return None while (not self.current.is_punctuation('(') or self.next() and self.current.is_punctuation('*')): if not self.bracket_stack and self.current.is_punctuation(';'): return None return_type.append(self.current.value) self.next() if not return_type: return None func_name = return_type.pop() if func_name not in self.functions: return None while (self.bracket_stack or self.next() and self.current.is_punctuation('(')): self.next() code = self.read_source_code(begin, self.previous.span[1]) + ';' return func_name, code def mark_for_erase(self, begin, end): self.chunks_to_erase.append((begin, end)) def read_source_code(self, begin, end): if self.chunks_to_erase: chunks = [] offset = begin for chunk_begin, chunk_end in self.chunks_to_erase: if chunk_end < offset: continue chunks.append(self.source_code[offset:chunk_begin]) offset = chunk_end chunks.append(self.source_code[offset:end]) self.chunks_to_erase = [] code = ''.join(chunks) else: code = self.source_code[begin:end] return code.strip()
parser = CParser() buf = r''' static void foo() { char x; if ('\x1') { x = '\x5'; x = '\x3'; } } ''' c_ast = parser.parse(buf, 'x.c') c_ast.show() print("#######") v = CASTVisitor() bytecode = v.visitMain(c_ast.ext[0]) print('\nbytecode: ' + str(bytecode)) labeled_bytecode = addLabels(bytecode) print('\nlabeled bytecode:') print_bytecode(labeled_bytecode) jump_bytecode = addJumps(labeled_bytecode) print('\njump bytecode:') print_bytecode(jump_bytecode)
class ForgivingDeclarationParser: linemarker = re.compile(r'^# \d+ "((?:\\.|[^\\"])*)"((?: [1234])*)$') tokens = { "LINEMARKER": r"^#.*$", "KEYWORD": ( "\\b(?:auto|break|case|char|const|continue|default|do|double|else|enum|extern|float" "|for|goto|if|int|long|register|return|short|signed|sizeof|static|struct|switch" "|typedef|union|unsigned|void|volatile|while|__extension__|__attribute__|__restrict)\\b" ), "IDENTIFIER": r"\b[a-zA-Z_](?:[a-zA-Z_0-9])*\b", "CHARACTER": r"L?'(?:\\.|[^\\'])+'", "STRING": r'L?"(?:\\.|[^\\"])*"', "INTEGER": r"(?:0[xX][a-fA-F0-9]+|[0-9]+)[uUlL]*", "FLOAT": ( r"(?:[0-9]+[Ee][+-]?[0-9]+|[0-9]*\.[0-9]+(?:[Ee][+-]?[0-9]+)?|[0-9]+\.[0-9]*(?:[Ee][+-]?[0-9]+)?)[fFlL]?" ), "PUNCTUATION": ( r"\.\.\.|>>=|<<=|\+=|-=|\*=|/=|%=|&=|\^=|\|=|>>|<<|\+\+|--|->|&&|\|\||<=|>=|" r"==|!=|;|\{|\}|,|:|=|\(|\)|\[|\]|\.|&|!|~|-|\+|\*|/|%|<|>|\^|\||\?" ), "SPACE": r"[ \t\v\n\f]*", "IGNORE": r".+?", } ignored_tokens = "SPACE", "IGNORE" regex = re.compile( "|".join(f"(?P<{token}>{pattern})" for token, pattern in tokens.items()), flags=re.MULTILINE, ) def __init__(self, source_code, functions=None, keep_args=""): self.source_code = source_code self.functions = functions self.token_stream = self.tokenize(source_code) self.previous = None self.current = None self.bracket_stack = [] self.source_context = [] self.typedefs = ["typedef int __builtin_va_list;"] self.cparser = CParser() self.keep_args = re.compile(f"^{keep_args}$") @classmethod def tokenize(cls, source_code): for match in cls.regex.finditer(source_code): if match.lastgroup not in cls.ignored_tokens: yield Token(match.lastgroup, match.group().strip(), match.span()) def __iter__(self): while self.next(): if self.current.is_keyword("typedef"): self.parse_typedef() function = self.parse_function_declaration() if function is not None: yield function if self.functions is not None and not self.functions: break while self.current and not ( self.current.is_punctuation(";", "}") and not self.bracket_stack ): self.next() def next(self): self.previous = self.current self.current = next(self.token_stream, None) if not self.current: return None if self.current.type == "PUNCTUATION": if self.current.value in "({[": self.bracket_stack.append(")}]"["({[".index(self.current.value)]) elif self.bracket_stack and self.current.value == self.bracket_stack[-1]: self.bracket_stack.pop() elif self.current.type == "LINEMARKER": filename, flags = self.linemarker.match(self.current.value).groups() if "1" in flags: self.source_context.append(filename) elif "2" in flags: self.source_context.pop() try: self.source_context[-1] = filename except IndexError: self.source_context.append(filename) self.erase_code_section(*self.current.span) self.next() elif self.current.is_keyword("__attribute__"): begin = self.current.span[0] stack_depth = len(self.bracket_stack) self.next() while len(self.bracket_stack) > stack_depth: self.next() self.erase_code_section(begin, self.current.span[1]) elif self.current.is_keyword("__extension__", "__restrict"): self.erase_code_section(*self.current.span) self.next() return self.current def parse_typedef(self): start_index = self.current.span[0] while self.current and not ( self.current.is_punctuation(";") and not self.bracket_stack ): self.next() self.typedefs.append(self.source_code[start_index : self.current.span[1]]) def parse_function_declaration(self): if self.bracket_stack: return None while self.current and self.current.is_prefix: self.next() start_index = self.current.span[0] return_type = [] while ( self.current and not self.current.is_punctuation("(") or self.next() and self.current.is_punctuation("*") ): if not self.bracket_stack and self.current.is_punctuation(";"): return None return_type.append(self.current.value) self.next() if not return_type: return None func_name = return_type.pop() if self.functions is not None and func_name not in self.functions: return None while ( self.current and self.bracket_stack or self.next() and self.current.is_punctuation("(") ): self.next() signature = self.source_code[start_index : self.previous.span[1]] + ";" code = "\n".join(self.typedefs) + "\n" + signature try: file_ast = self.cparser.parse(code) except ParseError: return None else: if self.functions is not None: self.functions.remove(func_name) return MockedFunction( func_name, file_ast.ext[-1] if self.keep_args.match(func_name) else rename_arguments(file_ast.ext[-1]), IncludeDirective.from_source_context(self.source_context), ) def erase_code_section(self, begin, end): self.source_code = ( self.source_code[:begin] + " " * (end - begin) + self.source_code[end:] )
#!/usr/bin/python """ A demo showing the usage of the preprocessor with pycparsing """ import argparse import io from ppci.api import preprocess from pycparser.c_parser import CParser if __name__ == '__main__': # Argument handling: arg_parser = argparse.ArgumentParser() arg_parser.add_argument('source', help='C source file') args = arg_parser.parse_args() filename = args.source # Preprocessing: f2 = io.StringIO() with open(filename, 'r') as f: preprocess(f, f2) source = f2.getvalue() # Parsing: parser = CParser() ast = parser.parse(source, filename) ast.show()
method = 'visit_' + node.__class__.__name__ visitor = getattr(self, method, self.generic_visit) return visitor(node) def visit_FuncCall(self, node): print("Visiting FuncCall") print(node.show()) print('---- parent ----') print(self.current_parent.show()) def generic_visit(self, node): """ Called if no explicit visitor function exists for a node. Implements preorder visiting of the node. """ oldparent = self.current_parent self.current_parent = node for c in node.children(): self.visit(c) self.current_parent = oldparent if __name__ == "__main__": source_code = r'''void foo() { L"hi" L"there"; } ''' parser = CParser() ast = parser.parse(source_code, filename='zz') ast.show(showcoord=True, attrnames=True, nodenames=True)
def parse_decompiled_code(code: str, lexer: LexerWrapper, parser: CParser, max_type_fix_tries: int = 10) -> Tuple[ASTNode, str]: r"""Parse preprocessed decompiled code and heuristically fix errors caused by undefined types. If a parse error is encountered, we attempt to fix the code by parsing the error message and checking whether if could be an undefined type error. If it is, we prepend a dummy ``typedef`` and retry parsing, until either the code parses or we run out of tries. :raises ValueError: When we've run out of tries for fixing types, or the issue cannot be resolved by adding a ``typedef`` (i.e., getting the same error after adding ``typedef``). :raises pycparser.c_parser.ParseError: When we cannot identify the error. :param code: The preprocessed code to parse :param lexer: The lexer to use while parsing. :param parser: The parser to use while parsing. :param max_type_fix_tries: Maximum retries to fix type errors. :return: A tuple containing the parsed AST and the modified code. """ added_types: Set[str] = set() code_lines = code.split("\n") for _ in range(max_type_fix_tries): try: decompiled_ast = parser.parse(code) break except pycparser.c_parser.ParseError as e: error_match = PARSE_ERROR_REGEX.match(str(e)) if error_match is None or not error_match.group("msg").startswith( "before: "): raise before_token = remove_prefix(error_match.group("msg"), "before: ") error_line = code_lines[int(error_match.group("line")) - 1] error_pos = int(error_match.group("col")) - 1 tokens = list(lexer.lex_tokens(error_line)) try: error_token_idx = next(idx for idx, token in enumerate(tokens) if token.lexpos == error_pos and token.value == before_token) # There are multiple possible cases here: # 1. The type is the first ID-type token before the reported token (`type token`). It might not # be the one immediately in front (for example, `(type) token`, `type *token`). # 2. The type is the token itself. This is rare and only happens in a situation like: # `int func(const token var)` or `int func(int a, token b)` # Replacing `const` with any combination of type qualifiers also works. if (error_token_idx > 0 and tokens[error_token_idx - 1].type in [ "CONST", "VOLATILE", "RESTRICT", "__CONST", "__RESTRICT", "__EXTENSION__", "COMMA" ]): type_token = tokens[error_token_idx] else: type_token = next(tokens[idx] for idx in range(error_token_idx - 1, -1, -1) if tokens[idx].type == "ID") except StopIteration: # If we don't catch this, it would terminate the for-loop in `main()`. Stupid design. raise e from None if type_token.value in added_types: raise ValueError( f"Type {type_token.value} already added (types so far: {list(added_types)})" ) added_types.add(type_token.value) typedef_line = f"typedef int {type_token.value};" code = typedef_line + "\n" + code code_lines.insert(0, typedef_line) else: raise ValueError(f"Type fixes exceeded limit ({max_type_fix_tries})") return decompiled_ast, code
class Parser(object): """A class for parsing C headres to python structs. It saves the context and configuration""" def __init__(self, conf=gcc_x86_64_le, debuglevel=0): super(Parser, self).__init__() self.conf = conf self.debuglevel = debuglevel self.basics = conf.basics self.names_to_pycstructs = {} self.structs_num = 0 self.unions_num = 0 self.arrays_num = 0 self.enums_num = 0 self.cdata = "" self.last_processed = "" funcs = {} funcs[pycparser.c_ast.ID] = self.id_handler funcs[pycparser.c_ast.IdentifierType] = self.type_handler funcs[pycparser.c_ast.Struct] = self.struct_handler funcs[pycparser.c_ast.Union] = self.union_handler funcs[pycparser.c_ast.Enum] = self.enum_handler funcs[pycparser.c_ast.EnumeratorList] = self.enumerator_list_handler funcs[pycparser.c_ast.Enumerator] = self.enumerator_handler funcs[pycparser.c_ast.ArrayDecl] = self.array_handler funcs[pycparser.c_ast.PtrDecl] = self.ptr_handler funcs[pycparser.c_ast.Typedef] = self.typedef_handler funcs[pycparser.c_ast.Typename] = self.typename_handler funcs[pycparser.c_ast.TypeDecl] = self.typedecl_handler funcs[pycparser.c_ast.Decl] = self.decl_handler funcs[pycparser.c_ast.FuncDecl] = self.func_decl_handler funcs[pycparser.c_ast.FuncDef] = self.func_def_handler funcs[pycparser.c_ast.Constant] = self.constant_handler funcs[pycparser.c_ast.BinaryOp] = self.binary_op_handler funcs[pycparser.c_ast.UnaryOp] = self.unary_op_handler funcs[pycparser.c_ast.Cast] = self.cast_handler self.funcs = funcs self.flush() def flush(self): self.pre = pcpp.Preprocessor() self.pre.line_directive = None self.cparse = CParser() # self.cparse.parse( # """ # typedef int uint8_t; # typedef int uint16_t; # typedef int uint32_t; # typedef int uint64_t; # typedef int int8_t; # typedef int int16_t; # typedef int int32_t; # typedef int int64_t; # """, "", 7) self.cdata = "" self.last_processed = "" def __getattr__(self, name): if name in self.__dict__ or not self.has_type(name): return self.__getattribute__(name) return self.get_type(name) def has_type(self, val): return val in self.names_to_pycstructs or self.conf.has_type(val) def get_type(self, val): if self.conf.has_type(val): return self.conf.get_type(val) return self.names_to_pycstructs[val] def set_type(self, name, val): self.names_to_pycstructs[name] = val self.names_to_pycstructs[(name, )] = val return val def id_handler(self, node): assert type(node) is pycparser.c_ast.ID return self.get_type(node.name) def typedef_handler(self, node): assert type(node) is pycparser.c_ast.Typedef name = node.name val = self.parse_node(node.type) if name in [ "uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t",]: return self.get_type(name) return self.set_type(name, val) def _field_handler(self, node): assert type(node) is pycparser.c_ast.Decl name = node.name typ = self.parse_node(node.type) return name, typ def enum_handler(self, node): assert type(node) == pycparser.c_ast.Enum name = node.name self.enums_num += 1 if name == None: name = "enum_num_%d" % self.enums_num values = self.parse_node(node.values) val = MetaPyEnum(name, (), dict(_values=values), self.conf) for item in val: self.set_type(str(item), item) return self.set_type(name, val) def enumerator_handler(self, node): assert type(node) == pycparser.c_ast.Enumerator return self.parse_node(node.value), node.name def enumerator_list_handler(self, node): assert type(node) == pycparser.c_ast.EnumeratorList res = [] last = -1 for item in node.enumerators: val, name = self.parse_node(item) if val is None: val = last + 1 last = val self.set_type(name, val) res.append((val, name)) return res def struct_handler(self, node): assert type(node) == pycparser.c_ast.Struct fields = [] name = node.name if not node.decls: if self.has_type((name, )): return self.get_type((name, )) else: fields = None if fields is not None: for decl in node.decls: field_name, field_type = self._field_handler(decl) fields.append((field_name, field_type)) self.structs_num += 1 if name == None: name = "struct_num_%d" % self.structs_num if self.has_type((name, )) and isinstance(type(self.get_type((name, ))), MetaPyStruct): val = self.get_type((name, )) val.assign_fields(fields, self.conf) else: val = MetaPyStruct(name, (), {"_fields" : fields}, self.conf) val.__module__ = None self.set_type(name, val) return val def union_handler(self, node): assert type(node) == pycparser.c_ast.Union fields = [] name = node.name if not node.decls: if self.has_type((name, )): return self.get_type((name, )) else: fields = None if fields is not None: for decl in node.decls: field_name, field_type = self._field_handler(decl) fields.append((field_name, field_type)) self.unions_num += 1 if name == None: name = "union_num_%d" % self.unions_num if self.has_type((name, )) and isinstance(type(self.get_type((name, ))), MetaPyUnion): val = self.get_type((name, )) val.assign_fields(fields, self.conf) else: val = MetaPyUnion(name, (), {"_fields" : fields}, self.conf) val.__module__ = None self.set_type(name, val) return val def array_handler(self, node): assert type(node) is pycparser.c_ast.ArrayDecl typ = self.parse_node(node.type) num = self.parse_node(node.dim) assert num is None or type(num) in [long, int] self.arrays_num += 1 val = MetaPyArray("array_num_%d" % self.arrays_num, (), {"_type" : typ, "_count" : num}, self.conf) val.__module__ = None return val def type_handler(self, node): assert type(node) is pycparser.c_ast.IdentifierType assert self.has_type(tuple(node.names)), str(tuple(node.names)) return self.get_type(tuple(node.names)) def typedecl_handler(self, node): assert type(node) is pycparser.c_ast.TypeDecl return self.parse_node(node.type) def typename_handler(self, node): assert type(node) is pycparser.c_ast.Typename return self.parse_node(node.type) def constant_handler(self, node): assert type(node) is pycparser.c_ast.Constant if node.type == 'char': return ord(eval(node.value)) if node.type == "int": return eval(node.value) assert 0, "Unknown constant type: %s" % node.type def ptr_handler(self, node): assert type(node) is pycparser.c_ast.PtrDecl return self.get_type(("void", "*", )) def decl_handler(self, node): assert type(node) is pycparser.c_ast.Decl return self.parse_node(node.type) def func_decl_handler(self, node): assert type(node) is pycparser.c_ast.FuncDecl return def func_def_handler(self, node): assert type(node) is pycparser.c_ast.FuncDef return def cast_handler(self, node): assert type(node) is pycparser.c_ast.Cast val = self.parse_node(node.expr) obj = self.parse_node(node.to_type)() obj._val_property = val return obj._val_property def binary_op_handler(self, node): assert type(node) is pycparser.c_ast.BinaryOp return eval("self.parse_node(node.left) %s self.parse_node(node.right)" % node.op) def unary_op_handler(self, node): assert type(node) is pycparser.c_ast.UnaryOp if node.op == "sizeof": return sizeof(self.parse_node(node.expr)) if node.op == "~": return ~self.parse_node(node.expr) if node.op == "-": return -self.parse_node(node.expr) assert False, "Unknown unary op: %s" % node.op def parse_node(self, node): if node is None: return node if type(node) in self.funcs: return self.funcs[type(node)](node) node.show() assert 0, "Unknown handler for type: %s" % repr(type(node)) def parse_string(self, data, file_name="<unknown>", include_dirs=[get_dir(__file__)], debuglevel=None): if debuglevel is None: debuglevel = self.debuglevel for i in include_dirs: self.pre.add_path(i) self.pre.parse(data) buff = cStringIO.StringIO() self.pre.write(buff) processed = buff.getvalue() self.last_processed = processed not_found = [line for line in processed.splitlines() if "#include" in line] if not_found: print "There is unresolved includes:" for line in not_found: print line assert "#include " not in processed for macro_name, macro in self.pre.macros.items(): if not macro.arglist: self.set_type(macro_name, self.pre.evalexpr(macro.value, get_strings=True)) types = """ typedef int uint8_t; typedef int uint16_t; typedef int uint32_t; typedef int uint64_t; typedef int int8_t; typedef int int16_t; typedef int int32_t; typedef int int64_t; """ contents = self.cparse.parse(types+processed, file_name) self.cdata += processed res = [] for ex in contents.ext: if debuglevel: ex.show() res.append(self.parse_node(ex)) res = res[8:] return res[0] if len(res) == 1 else res def parse_file(self, file_path, include_dirs=None, debuglevel=None): if include_dirs is None: include_dirs = [get_dir(__file__), get_dir(file_path)] with open(file_path, "rb") as f: data = f.read() return self.parse_string(data, file_path, include_dirs, debuglevel) def update_globals(self, g): """Enters the new classes to globals. You should call that functions like that: p.update_globals(globals()) """ self.conf.update_globals(g) g.update([(k,v) for k,v in self.names_to_pycstructs.items() if isinstance(k, str)])