def get_context(doc, segment, linear_value): node = segment[0] context = [] functions = {} if not domutil.beyond_parent(node, linear_value): context = domutil.get_previous_AST_elements(node, linear_value) context.extend(segment) elif not domutil.beyond_method(node, linear_value): linear_value -= domutil.get_previous_AST_element_number(node) + 1 node = domutil.get_parent_AST_element(node) context, functions = get_context(doc, [node], linear_value) else: function_node = domutil.get_function_element(node) name_node = domutil.get_first_child_by_tagname(function_node, 'name') name = domutil.get_text_content(name_node) for call_node in doc.getElementsByTagName('call'): call_name_node = domutil.get_first_child_by_tagname(call_node, 'name') call_name = domutil.get_text_content(call_name_node) if call_name == name: if config.get('show_context_search_debug'): blue('beyound method '+name + ' and found call node: ') linear_value -= domutil.get_previous_AST_element_number_until_function(node) + 1 context, functions = get_context(doc, [call_node], linear_value) functions[call_name] = function_node return context, functions if config.get('show_context_search_debug') == 'true': yellow('beyound method '+name+' but did not found call node: ') return context, functions
def parse_function(node): """parse a <function> node. :return (type_name, function_name, ((type1, param1), (type2, param2), ...)) """ assert(domutil.is_element(node) and node.tagName == 'function') type_node = domutil.get_first_child_by_tagname(node, 'type') name_node = domutil.get_first_child_by_tagname(node, 'name') parameter_list_node = domutil.get_first_child_by_tagname(node, 'parameter_list') type_name = domutil.get_text_content(type_node) name = domutil.get_text_content(name_node) params = parse_parameter_list(parameter_list_node) return (type_name, name, params)
def get_struct_alias(code): doc = domutil.get_doc_from_code(code) typedefs = doc.getElementsByTagName('typedef') if typedefs: # this is typedef struct xxx {} name; typedef = typedefs[0] type_node = domutil.get_first_child_by_tagname(typedef, 'type') struct_node = domutil.get_first_child_by_tagname(type_node, 'struct') if not struct_node: logger.warning('it is not a structure') return None alias_node = domutil.get_first_child_by_tagname(typedef, 'name') alias = domutil.get_text_content(alias_node) return alias return None
def parse_recursive(include_path): """recursively parse a header file, including all the includes inside it. :param include_path: stdio.h :return a set contains all defines,structs,functions """ logger.info('parsing: '+include_path) typedefs = set() defines = set() functions = set() to_parse_set = set() to_parse_set.add(include_path) parsed_set = set() while to_parse_set: include_path = to_parse_set.pop() parsed_set.add(include_path) full_path = include_path_to_full_path(include_path) if not full_path: logger.warning('this header does not exists: '+include_path) continue xml = srcmlutil.get_xml_from_file(full_path) doc = parseString(xml) for typedef_node in doc.getElementsByTagName('typedef'): name_node = domutil.get_first_child_by_tagname(typedef_node, 'name') name = domutil.get_text_content(name_node) if name and not name.startswith('_'): typedefs.add(name) for struct_node in doc.getElementsByTagName('struct'): name_node = domutil.get_first_child_by_tagname(struct_node, 'name') name = domutil.get_text_content(name_node) if name and not name.startswith('_'): typedefs.add(name) for define_node in doc.getElementsByTagName('cpp:define'): name_node = domutil.get_first_child_by_tagnames(define_node, 'cpp:macro', 'name') name = domutil.get_text_content(name_node) if name and not name.startswith('_'): defines.add(name) for function_node in doc.getElementsByTagName('function_decl'): name_node = domutil.get_first_child_by_tagname(function_node, 'name') name = domutil.get_text_content(name_node) if name and not name.startswith('_'): functions.add(name) for include_node in doc.getElementsByTagName('cpp:include'): name_node = domutil.get_first_child_by_tagname(include_node, 'cpp:file') name = domutil.get_text_content(name_node) name = name[1:-1] # remove "<>" if name not in parsed_set | to_parse_set: to_parse_set.add(name) return typedefs | defines | functions
def func(directory): for root,_,files in os.walk(directory): for f in files: if f.endswith('.c'): filename = os.path.join(root, f) print(filename) with tempfile.TemporaryFile() as fp: with open(filename) as f: change_count = 0 for line in f: if line.strip().endswith(';') and ',' in line: if line.count('(') == line.count(')'): doc = domutil.get_doc_from_code(line) unit_nodes = doc.getElementsByTagName('unit') unit_node = unit_nodes.item(0) decl_stmt_node = domutil.get_first_child_by_tagname(unit_node, 'decl_stmt') if domutil.is_element(decl_stmt_node) and decl_stmt_node.tagName == 'decl_stmt': decl_nodes = domutil.get_children_by_tagname(decl_stmt_node, 'decl') if len(decl_nodes) > 1: change_count+=1 # result = syntaxutil.parse_decl_stmt_code(line) full_type = get_type(line) # FIXME job *job, *sj = deserializeJob(nextjob,remlen,&nextjob,SER_MESSAGE); if full_type and '(' not in line: variables = line.split(',')[:] var1 = variables[0] line = var1 + ';\n' + ';\n'.join([full_type + ' ' + var for var in variables[1:]]) fp.write(line.encode('utf8')) print('made '+str(change_count)+' changes to file: ' + filename) fp.seek(0) content = fp.read().decode('utf8') with open(filename, 'w') as f: f.write(content)
def parse_decl(node): """Parse a <decl> node. Assume only one variable can be in the statement(guranteed by decl spliter preprocessor). :return (type,name,init) """ assert(domutil.is_element(node) and node.tagName == 'decl') type_node = domutil.get_first_child_by_tagname(node, 'type') name_node = domutil.get_first_child_by_tagname(node, 'name') type_name = domutil.get_text_content(type_node) var_name = domutil.get_text_content(name_node) if '[' in var_name: new_var_name = var_name[:var_name.find('[')] new_type_name = type_name + var_name[var_name.find('['):] var_name = new_var_name type_name = new_type_name # TODO init return (type_name, var_name, None)
def parse_decl_stmt(node): """Parse a <decl_stmt> node. Assume only one variable can be in the statement(guranteed by decl spliter preprocessor). :return (type, name, init) """ assert(domutil.is_element(node) and node.tagName == 'decl_stmt') decl_node = domutil.get_first_child_by_tagname(node, 'decl') return parse_decl(decl_node)
def parse_struct(node): """Parse a <struct> node Do not anonymous inner enum, union or structs. :return (name, [(type1, field1), (type1, field1), ...]) """ assert(domutil.is_element(node) and node.tagName == 'struct') name_node = domutil.get_first_child_by_tagname(node, 'name') block_node = domutil.get_first_child_by_tagname(node, 'block') if name_node: name = domutil.get_text_content(name_node) else: name = '' fields = [] for decl_stmt_node in domutil.get_children_by_tagname(block_node, 'decl_stmt'): decl = parse_decl_stmt(decl_stmt_node) fields.append(decl[0:2]) return (name, fields)
def get_undefined_vars(nodes, resolved): """Get undefined variable. :param nodes: a list of dom nodes :param resolved: a set of names as strings that is assumed to be defined :return a set of names """ result = set() for node in nodes: if domutil.is_element(node): if node.tagName == 'decl_stmt': _,var_name,_ = syntaxutil.parse_decl_stmt(node) resolved.add(var_name) elif node.tagName == 'expr': # in #ifdef, there may be `#elif defined(__sun)` if domutil.in_node(node, 'cpp:ifdef', level=2) or\ domutil.in_node(node, 'cpp:elif', level=2) or\ domutil.in_node(node, 'cpp:ifndef', level=2): continue name_set = syntaxutil.parse_expr(node) for name in name_set: # uint8_t, false, true, NULL if sys.resolve_single(name): continue # here we find the undefined variable if name not in resolved and name not in result: result.add(name) elif node.tagName == 'for': init_node = domutil.get_first_child_by_tagname(node, 'init') if init_node: _, var = syntaxutil.parse_for_init(init_node) if var: resolved.add(var) elif node.tagName == 'parameter_list': params = syntaxutil.parse_parameter_list(node) for _,name in params: resolved.add(name) elif node.tagName == 'cpp:define': value_node = domutil.get_first_child_by_tagname(node, 'cpp:value') text = domutil.get_text_content(value_node) match = re.findall(r'([A-Z_]{2,})', text) for m in match: if m not in resolved: result.add(m) new_result = get_undefined_vars(node.childNodes, resolved | result) result.update(new_result) return result
def extract_calls(node): """Extract all <call><name> inside the node. :return a list of names as strings """ names = [] for call_node in node.getElementsByTagName('call'): name_node = domutil.get_first_child_by_tagname(call_node, 'name') names.append(domutil.get_text_content(name_node)) return names
def get_struct_name(code): doc = domutil.get_doc_from_code(code) structs = doc.getElementsByTagName('struct') if structs: struct = structs[0] name_node = domutil.get_first_child_by_tagname(struct, 'name') name = domutil.get_text_content(name_node) return name return None
def extract_to_resolve(node, resolved): """Extract functions, types, undefined global variables to resolve :param node: dom node that need to resolve :return a set """ functions = set() types = set() unknown = set() for n in node.getElementsByTagName('call'): # in #ifdef, there may be `#elif defined(__sun)` if domutil.in_node(n, 'cpp:ifdef', level=2) or\ domutil.in_node(n, 'cpp:elif', level=2) or\ domutil.in_node(n, 'cpp:ifndef', level=2): continue call_name_node = domutil.get_first_child_by_tagname(n, 'name') call_name = domutil.get_text_content(call_name_node) functions.add(call_name) for n in node.getElementsByTagName('type'): if domutil.in_node(n, 'cpp:define', level=4): continue name_node = domutil.get_first_child_by_tagname(n, 'name') name = domutil.get_text_content(name_node) types.add(name) for n in node.getElementsByTagName('cpp:value'): value = domutil.get_text_content(n) types |= syntaxutil.parse_type_cast(value) for n in node.getElementsByTagName('cpp:define'): functions |= syntaxutil.parse_cpp_define(n) for n in node.getElementsByTagName('cpp:value'): # now lets resolve every word value = domutil.get_text_content(n) for word in re.findall(r'\b\w+\b', value): unknown.add(word) # if function return type if enum, the function is not marked as <function> # but what if the enum is trully enum? This is addressed in resolver/localfunc.py for n in node.getElementsByTagName('enum'): name_node = domutil.get_first_child_by_tagname(n, 'name') name = domutil.get_text_content(name_node) functions.add(name) variables = io.get_undefined_vars([node], resolved) if '' in functions: functions.remove('') if '' in types: types.remove('') if '' in variables: variables.remove('') # return functions-resolved, types-resolved, variables return (functions | types | variables | unknown) - resolved
def parse_typedef(node): """Parse a <typedef> node <typedef>typedef <type>struct <name>A</name> *</type> <name>hello_t</name>;</typedef> <typedef>typedef <type><struct>struct <name>_stritem</name> <block> ... </struct></type> <name>item</name>;</typedef> :return (alias, original) """ assert(domutil.is_element(node) and node.tagName == 'typedef') type_node = domutil.get_first_child_by_tagname(node, 'type') # support <function_decl> in <typedef>, i.e. typdef void *func(int a, int b) # return: (func, '') if not type_node: function_decl_node = domutil.get_first_child_by_tagname(node, 'function_decl') name_node = domutil.get_first_child_by_tagname(function_decl_node, 'name') alias = domutil.get_text_content(name_node) return (alias.strip(), '') name_node = domutil.get_first_child_by_tagname(node, 'name') original = domutil.get_text_content_except(type_node, 'block') alias = domutil.get_text_content(name_node) return (alias.strip(), original.strip())
def parse_parameter_list(node): """Parse a <parameter_list> of a function. :return ((type1,var1), (type2,var2), ...) """ assert(domutil.is_element(node) and node.tagName == 'parameter_list') result = [] for param_node in domutil.get_children_by_tagname(node, 'param'): decl_node = domutil.get_first_child_by_tagname(param_node, 'decl') if decl_node: type_name,var_name,_ = parse_decl(decl_node) result.append((type_name, var_name)) return result
def parse_for_init(node): """Parse init statement in for. Assume only one variable in decl. :return (type1,var1) """ assert(domutil.is_element(node) and node.tagName == 'init') decl_node = domutil.get_first_child_by_tagname(node, 'decl') if decl_node: type_name, var_name, _ = parse_decl(decl_node) return (type_name, var_name) else: return (None, None)
def parse_cpp_define(node): """parse a #define statement. <cpp:define> :return a set of function name to resolve """ to_resolve = set() # cpp_macro = domutil.get_first_child_by_tagname(node, 'cpp:macro') cpp_value = domutil.get_first_child_by_tagname(node, 'cpp:value') # param_list_node = domutil.get_first_child_by_tagname(cpp_macro, 'parameter_list') # params = parse_parameter_list(param_list_node) value = domutil.get_text_content(cpp_value) doc = domutil.get_doc_from_code(value) for call_node in doc.getElementsByTagName('call'): name_node = domutil.get_first_child_by_tagname(call_node, 'name') name = domutil.get_text_content(name_node) to_resolve.add(name) # emitf(__LINE__, "\t" __VA_ARGS__ # this will be passed as <macro> for macro_node in doc.getElementsByTagName('macro'): name_node = domutil.get_first_child_by_tagname(macro_node, 'name') name = domutil.get_text_content(name_node) to_resolve.add(name) return to_resolve
def resolve_undefined_vars(nodes, resolved): """Get undefined variable. Resolve its type. :param nodes: a list of dom nodes :param resolved: a set of names as strings that is assumed to be defined :return {var: type, ...} """ result = {} for node in nodes: if domutil.is_element(node): if node.tagName == 'decl_stmt': _,var_name,_ = syntaxutil.parse_decl_stmt(node) resolved.add(var_name) elif node.tagName == 'expr': # in #ifdef, there may be `#elif defined(__sun)` if domutil.in_node(node, 'cpp:ifdef', level=2) or\ domutil.in_node(node, 'cpp:elif', level=2) or\ domutil.in_node(node, 'cpp:ifndef', level=2): continue name_set = syntaxutil.parse_expr(node) for name in name_set: # uint8_t, false, true, NULL if sys.resolve_single(name): continue # here we find the undefined variable if name not in resolved and name not in result: type_name = resolve_local_type(name, node) if type_name: result[name] = type_name # if it is a global variable, do not print warning elif not local.check_global_variable(name): logger.warning('undefind variable ' + name + ' not resovled for local type.') elif node.tagName == 'for': init_node = domutil.get_first_child_by_tagname(node, 'init') if init_node: _, var = syntaxutil.parse_for_init(init_node) if var: resolved.add(var) new_result = resolve_undefined_vars(node.childNodes, resolved | result.keys()) result.update(new_result) return result
def instrument_segment(doc, segment): first_node = segment[0] last_node = segment[-1] parent = first_node.parentNode pre = doc.createElement('comment') pre.appendChild(doc.createTextNode('\n//@Pre\n')) parent.insertBefore(pre, first_node) post = doc.createElement('comment') post.appendChild(doc.createTextNode('\n//@Post\n')) parent.insertBefore(post, last_node.nextSibling) segment.insert(0, pre) segment.append(post) # loop invariant if config.get('instrument_loop') == 'true': for node in segment: if domutil.is_element(node) and node.tagName == 'for' or node.tagName == 'while': block_node = domutil.get_first_child_by_tagname(node, 'block') if not block_node: continue inner = doc.createElement('comment') inner.appendChild(doc.createTextNode('\n//@Inner\n')) block_node.insertBefore(inner, block_node.lastChild) return segment