def traverse(c: cindex.Cursor, indent='') -> None: # skip if c.location.file.name != str(path): # exclude included file return if c.hash in used: # avoid show twice return used.add(c.hash) ref = '' if c.referenced and c.referenced.hash != c.hash: ref = f' => {c.referenced.hash:#010x}' canonical = '' if c.canonical and c.canonical.hash != c.hash: canonical = f' => {c.canonical.hash:#010x} (forward decl)' value = f'{c.hash:#010x}:{indent} {c.kind}: {c.spelling}{ref}{canonical}' print(value) if c.kind == cindex.CursorKind.UNEXPOSED_DECL: tokens = [t for t in c.get_tokens()] if tokens and tokens[0].spelling == 'extern': # extern "C" block for child in c.get_children(): traverse(child) return for child in c.get_children(): traverse(child, indent + ' ')
def match_element(parent: Cursor, element: CursorPathElement): if isinstance(element, str): for child in parent.get_children(): if child.displayname == element: yield child else: children_of_kind = [child for child in parent.get_children() if child.kind == element[0]] if len(children_of_kind) > element[1]: yield children_of_kind[element[1]]
def traverse(c: cindex.Cursor) -> None: if not c.location.file: return current = get_or_create_header(c) if current.name in include: pass else: return if c.hash in used: # already processed return if c.kind not in kinds: # skip return if c.kind == cindex.CursorKind.UNEXPOSED_DECL: for child in c.get_children(): traverse(child) return node = get_node(current, c) if not node: return used[c.hash] = node current.nodes.append(node)
def __init__(self, cursor: cindex.Cursor): super().__init__(cursor) self.template_arguments = [] for x in cursor.get_children(): param = CursorAdapter.create(x) if param is not None: self.template_arguments.append(param)
def parseIdentifier(node: Cursor, regex: str = None) -> str: # TODO: Check this while node.kind == CursorKind.CSTYLE_CAST_EXPR: node = list(node.get_children())[1] # Descend to child node if node.kind == CursorKind.UNEXPOSED_EXPR: node = descend(node) # Descend to child node if node.kind == CursorKind.UNARY_OPERATOR and next( node.get_tokens()).spelling in ('&', '*'): node = descend(node) # Check for null if node.kind == CursorKind.GNU_NULL_EXPR: return '' # Validate the node if node.kind != CursorKind.DECL_REF_EXPR: raise ParseError('expected identifier') value = node.spelling # Apply conditional regex constraint if regex and not re.match(regex, value): raise ParseError(f'expected identifier matching {regex}') return value
def _find_callsites( node: Cursor, closure: List[Cursor], in_function: bool) -> Tuple[List[Cursor], List[List[Cursor]]]: """ Macro invocations are not aligned with the remaining ast. - We need the macro invocation to get source line, where INT3 was included - To get the position in the AST, we look for a our dummy header, this gives us the closure with all defined variables and symbols We can combine both information with a simple zip(a,b) """ include_locations = [] closures = [] # reset closure outside of functions if not in_function: closure = [] if node.kind == CursorKind.MACRO_INSTANTIATION and node.spelling == "INT3": include_locations.append(node.location) elif in_function: if (node.kind == CursorKind.COMPOUND_STMT and node.location is not None and node.location.file.name.endswith("int3/dummy.h")): closures.append(closure) elif node.kind == CursorKind.VAR_DECL or node.kind == CursorKind.PARM_DECL: closure.append(node) for c in node.get_children(): in_function = in_function or node.kind == CursorKind.FUNCTION_DECL include_locations_, closures_ = _find_callsites( c, closure, in_function) include_locations.extend(include_locations_) closures.extend(closures_) return include_locations, closures
def create(cursor: cindex.Cursor) -> Optional['ClangInterface']: name = cursor.spelling if name[0] != 'I': return None base = 'IUnknown' methods: List[ClangMethod] = [] iid = '' if name == 'ID2D1Factory': a = 0 for x in cursor.get_children(): if x.kind == cindex.CursorKind.CXX_BASE_SPECIFIER: ref = next(y for y in x.get_children() if y.kind == cindex.CursorKind.TYPE_REF) base = ref.spelling if base.startswith('struct '): base = base[7:] elif x.kind == cindex.CursorKind.CXX_METHOD: if not any(y for y in methods if y.name == x.spelling): methods.append(ClangMethod(x)) elif x.kind == cindex.CursorKind.CXX_ACCESS_SPEC_DECL: pass elif x.kind == cindex.CursorKind.UNEXPOSED_ATTR: try: iid = extract(x).split('"')[1].strip().split('-') iid = f'0x{iid[0]}, 0x{iid[1]}, 0x{iid[2]}, [0x{iid[3][0:2]}, 0x{iid[3][2:4]}, 0x{iid[4][0:2]}, 0x{iid[4][2:4]}, 0x{iid[4][4:6]}, 0x{iid[4][6:8]}, 0x{iid[4][8:10]}, 0x{iid[4][10:12]}]' except: pass else: print(x.kind) return ClangInterface(name, base, methods, iid)
def __subproc_same_func_in_file_check(cursor: cl.Cursor, error_an1, error_an2): for child in cursor.get_children(): if child.kind == cl.CursorKind.FUNCTION_DECL: if child.extent.start.line <= error_an1.main_line <= child.extent.end.line and \ child.extent.start.line <= error_an2.main_line <= child.extent.end.line: return True
def get_string_literal(c: Cursor) -> Optional[str]: if c.kind == CursorKind.STRING_LITERAL: # TODO: This parses python literals which are not the same as C. Ideally this would use clang to parse C # literals to bytes. return ast.literal_eval(c.spelling) else: for ch in c.get_children(): return get_string_literal(ch)
def get_binary_op(cursor: cindex.Cursor) -> str: try: children_list = [i for i in cursor.get_children()] left_offset = len([i for i in children_list[0].get_tokens()]) op = [i for i in cursor.get_tokens()][left_offset].spelling return op except Exception: return ''
def __init__(self, cursor: cindex.Cursor) -> None: self.name = cursor.spelling self.result = cursor.result_type.spelling self.args: List[ClangNamedType] = [] for x in cursor.get_children(): if x.kind == cindex.CursorKind.PARM_DECL: self.args.append(ClangNamedType(x.spelling, x.type.spelling))
def _parse(self, c: cindex.Cursor) -> None: for child in c.get_children(): if child.kind == cindex.CursorKind.FIELD_DECL: # print( # f'{child.spelling}: {int(self.t.get_offset(child.spelling)/8)}' # ) field = StructNode(self.path, child, False) if child.type == cindex.TypeKind.TYPEDEF: field_type = cdeclare.parse_declare( get_typedef_type(child).spelling) else: field_type = cdeclare.parse_declare(child.type.spelling) field.field_type = field_type self.fields.append(field) elif child.kind == cindex.CursorKind.STRUCT_DECL: struct = StructNode(self.path, child) struct.field_type = 'struct' self.fields.append(struct) elif child.kind == cindex.CursorKind.UNION_DECL: union = StructNode(self.path, child) union.field_type = 'union' self.fields.append(union) elif child.kind == cindex.CursorKind.UNEXPOSED_ATTR: value = extract(child) d3d11_key = 'MIDL_INTERFACE("' d2d1_key = 'DX_DECLARE_INTERFACE("' dwrite_key = 'DWRITE_DECLARE_INTERFACE("' if value.startswith(d3d11_key): self.iid = uuid.UUID(value[len(d3d11_key):-2]) elif value.startswith(d2d1_key): self.iid = uuid.UUID(value[len(d2d1_key):-2]) elif value.startswith(dwrite_key): self.iid = uuid.UUID(value[len(dwrite_key):-2]) else: print(value) elif child.kind == cindex.CursorKind.CXX_BASE_SPECIFIER: if child.type == cindex.TypeKind.TYPEDEF: self.base = get_typedef_type(child).spelling else: self.base = child.type.spelling elif child.kind == cindex.CursorKind.CXX_METHOD: method = FunctionNode(self.path, child) if not method.has_body: self.methods.append(method) elif child.kind == cindex.CursorKind.CONSTRUCTOR: pass elif child.kind == cindex.CursorKind.DESTRUCTOR: pass elif child.kind == cindex.CursorKind.CONVERSION_FUNCTION: pass elif child.kind == cindex.CursorKind.CXX_ACCESS_SPEC_DECL: pass elif child.kind == cindex.CursorKind.FUNCTION_TEMPLATE: pass elif child.kind == cindex.CursorKind.USING_DECLARATION: pass else: raise Exception(child.kind)
def parseClass(self, cursor: ci.Cursor) -> parsed.Klass: methods = [] for child in cursor.get_children(): if child.kind == ci.CursorKind.CXX_METHOD: methods.append(self.parseMethod(child)) return parsed.Klass(name=cursor.spelling, decl_file=self.file_path, methods=methods)
def get_unary_op(self, cursor: Cursor): # libclang does not expose the unary operation in the C API... arg: Cursor = next(cursor.get_children()) ext: SourceRange = arg.extent op_text = self.content[cursor.extent.start.offset:ext.start.offset].lstrip().rstrip() return op_text
def walk_ast(cursor: cindex.Cursor, callback: Callable[[cindex.Cursor, T], WalkResult], data: T = None): for child in cursor.get_children(): result = callback(child, data) if result == WalkResult.BREAK: break elif result == WalkResult.RECURSE: walk_ast(child, callback, data)
def detect_ast_insertions(cls, before: Cursor, after: Cursor, ast_path: CursorPath) -> Iterable[CursorPath]: before_children = [ hex( hash( tuple((token.kind.value, token.spelling) for token in child.get_tokens()))) + '\n' for child in before.get_children() ] after_children = [ hex( hash( tuple((token.kind.value, token.spelling) for token in child.get_tokens()))) + '\n' for child in after.get_children() ] diff = unidiff.PatchSet.from_string(''.join( difflib.unified_diff(before_children, after_children, fromfile='a', tofile='a'))) if not diff.modified_files: return assert len(diff.modified_files) == 1 modified: PatchedFile = diff.modified_files[0] for hunk in modified: # type: Hunk for i, line in enumerate(hunk): # type: (int, Line) if not line.is_context and line.is_added: if i != 0 and hunk[i - 1].is_removed: before_child = list( before.get_children())[hunk[i - 1].source_line_no - 1] after_child = list( after.get_children())[line.target_line_no - 1] yield from cls.detect_ast_insertions( before_child, after_child, ast_path.appended(after, after_child)) else: line_no = line.target_line_no yield ast_path.appended( after, list(after.get_children())[line_no - 1])
def __init__(self, cursor: cindex.Cursor) -> None: self.name = cursor.spelling self.fields: List[ClangNamedType] = [] for x in cursor.get_children(): if x.kind == cindex.CursorKind.FIELD_DECL: self.fields.append(ClangNamedType(x.spelling, x.type.spelling)) for y in x.get_children(): a = 0 else: a = 0
def print_field_decl(node: Cursor): """ Parse a FIELD_DECL expression and print its output. :param node Cursor: The node to parse. """ has_children = sum(1 for _ in node.get_children()) if not has_children: output = {} output['name'] = node.spelling print(json.dumps(output))
def traverse(elem: Cursor, depth: int = 0, print_fun=print): indent = ' ' * depth if elem is None: print_fun(f'{indent}None') return print_fun(f'{indent}{elem.kind}') if hasattr(elem, 'get_children'): for child in elem.get_children(): traverse(child, depth + 1, print_fun=print_fun)
def parseEnum(self, cursor: ci.Cursor) -> parsed.Enum: items = [] for child in cursor.get_children(): if child.kind == ci.CursorKind.ENUM_CONSTANT_DECL: it = parsed.Enum.Field(child.spelling, child.enum_value, self.trimComment(child.raw_comment)) items.append(it) return parsed.Enum(name=cursor.spelling, decl_file=self.file_path, items=items)
def generate_enum(self, elem: Cursor): name = self.get_name(elem) values = [] for value in elem.get_children(): if value.kind == CursorKind.ENUM_CONSTANT_DECL: values.append(f'{self.get_name(value)} = {value.enum_value}') else: print('ERROR') show_elem(value) values = '\n'.join(values) return f"\nclass {name}(enum):\n pass\n\n{values}\n"
def get_binary_op(self, binary_cursor: Cursor): # libclang does not expose the binary operation in the C API. There is a patch for that # (https://reviews.llvm.org/D10833?id=39158) but that has been stuck in "code review" for three years... children = binary_cursor.get_children() left: Cursor = next(children) right: Cursor = next(children) left_ext: SourceRange = left.extent right_ext: SourceRange = right.extent op_text = self.content[left_ext.end.offset:right_ext.start.offset].lstrip().rstrip() return op_text
def __init__(self, path: pathlib.Path, c: cindex.Cursor) -> None: super().__init__(path, c) self.values: List[EnumValue] = [] for child in c.get_children(): if child.kind == cindex.CursorKind.ENUM_CONSTANT_DECL: self.values.append(EnumValue(child.spelling, child.enum_value)) else: raise Exception(child.kind) if not self.name: name = self.values[0].name for v in self.values[1:]: name = get_common_start(name, v.name) print(name) self.name = name
def get_type_from_hash(self, t: cindex.Type, c: cindex.Cursor) -> TypeRef: ''' 登録済みの型をhashから取得する ''' if t.kind in (cindex.TypeKind.ELABORATED, cindex.TypeKind.RECORD, cindex.TypeKind.TYPEDEF, cindex.TypeKind.ENUM): # structなど children = [child for child in c.get_children()] for child in children: if child.kind in (cindex.CursorKind.STRUCT_DECL, cindex.CursorKind.UNION_DECL): decl = self.get(child) if decl: return TypeRef(decl, t.is_const_qualified()) raise Exception() elif child.kind == cindex.CursorKind.TYPE_REF: if not self.has(child.referenced): self.parse_cursor(child.referenced) decl = self.get(child.referenced) if decl: return TypeRef(decl, t.is_const_qualified()) raise Exception() elif child.kind in (cindex.CursorKind.UNEXPOSED_ATTR, cindex.CursorKind.DLLIMPORT_ATTR): pass else: raise Exception() raise Exception() if t.kind == cindex.TypeKind.FUNCTIONPROTO: return TypeRef(cpptypeinfo.Void(), t.is_const_qualified()) children = [child for child in c.get_children()] raise Exception()
def find_node(node: Cursor, line: int) -> Union[Cursor, bool]: """ Find a node based on a given line number. :param node clang.cindex.Cursor: The node itself. :param line int: The line number where the node is located at. :rtype clang.cindex.Cursor/bool: The found node or False otherwise. """ if node.location.line == line: return node for child in node.get_children(): result = find_node(child, line) if result: return result return False
def parse_enum(self, c: cindex.Cursor) -> Enum: name = c.type.spelling if not name: raise Exception(f'no name') values = [] for child in c.get_children(): if child.kind == cindex.CursorKind.ENUM_CONSTANT_DECL: values.append(EnumValue(child.spelling, child.enum_value)) else: raise Exception(f'{child.kind}') decl = Enum(name, values) self.parser.get_current_namespace().register_type(name, decl) decl.file = pathlib.Path(c.location.file.name) decl.line = c.location.line self.add(c, decl) return decl
def parse_functionproto(self, c: cindex.Cursor) -> Function: children = [child for child in c.get_children()] def to_param(child): decl = self.cindex_type_to_cpptypeinfo(child.type, child) ref = TypeRef(decl, child.type.is_const_qualified()) return Param(child.spelling, ref) params = [] result: cpptypeinfo.Type = cpptypeinfo.Void() for child in children: if child.kind == cindex.CursorKind.TYPE_REF: result = self.get(child.referenced) elif child.kind == cindex.CursorKind.PARM_DECL: params.append(to_param(child)) return Function(result, params)
def parse_FUNCTION_DECL(cursor: Cursor) -> ast.Node: args: List[ast.VariableDeclaration] = [] for c in cursor.get_children(): if c.kind == CursorKind.PARM_DECL: typ = type_to_ehlit(c.type) assert isinstance(typ, ast.Symbol) args.append( ast.VariableDeclaration(typ, ast.Identifier(0, c.spelling))) ret_type = type_to_ehlit(cursor.type.get_result()) assert isinstance(ret_type, ast.Symbol) return ast.Function( 0, ast.Qualifier.NONE, ast.TemplatedIdentifier(0, '@func', [ ast.FunctionType(ret_type, args, cursor.type.is_function_variadic()) ]), ast.Identifier(0, cursor.spelling))
def _traverse_namespace(self, cursor: Cursor, namespaces: List[Text]) -> None: """Traverses a namespace in clang AST. Args: cursor: The cursor which is pointing to the head of the namespace. namespaces: The parent namespace in which the current namespace is in. """ for c in cursor.get_children(): if c.kind in (CursorKind.CLASS_DECL, CursorKind.STRUCT_DECL): self._traverse_class(c, namespaces) elif (c.kind == CursorKind.FUNCTION_DECL and c.type.kind == TypeKind.FUNCTIONPROTO): self._traverse_function(c, namespaces) elif c.kind == CursorKind.NAMESPACE: child_namespaces = list(namespaces) child_namespaces.append(c.spelling) self._traverse_namespace(c, child_namespaces)
def typedef_elaborated_type(self, underlying: cindex.Type, c: cindex.Cursor) -> Optional[TypeRef]: ''' Typedefとともに型定義(struct, enum....) ''' if underlying.kind != cindex.TypeKind.ELABORATED: return None children = [child for child in c.get_children()] for child in children: if child.kind in [ cindex.CursorKind.STRUCT_DECL, cindex.CursorKind.UNION_DECL, ]: struct = self.get(child) if struct: # decl = self.parser.typedef(c.spelling, struct) # decl.file = pathlib.Path(c.location.file.name) # decl.line = c.location.line # self.add(child, decl) return TypeRef(struct, underlying.is_const_qualified()) raise Exception() if child.kind == cindex.CursorKind.ENUM_DECL: enum = self.get(child) if enum: # decl = self.parser.typedef(c.spelling, enum) # decl.file = pathlib.Path(c.location.file.name) # decl.line = c.location.line # self.add(child, decl) return TypeRef(enum, underlying.is_const_qualified()) raise Exception() if child.kind == cindex.CursorKind.TYPE_REF: ref = self.get(child.referenced) if ref: # decl = self.parser.typedef(c.spelling, ref) # decl.file = pathlib.Path(c.location.file.name) # decl.line = c.location.line # self.add(child, decl) return TypeRef(ref, underlying.is_const_qualified()) raise Exception() raise Exception() raise Exception()
def find_child(parent: Cursor, kind: CursorKind, spelling: str): for node in parent.get_children(): if kind == node.kind and spelling == node.spelling: return node
def print_all_of_kind(parent: Cursor, kind: CursorKind): for node in parent.get_children(): #if kind == node.kind: debug_cursor(node) print_all_of_kind(node, kind)