Esempio n. 1
0
def get_context(doc, segment, linear_value):
    node = segment[0]
    context = []
    functions = {}
    if not domutil.beyond_parent(node, linear_value):
        context = domutil.get_previous_AST_elements(node, linear_value)
        context.extend(segment)
    elif not domutil.beyond_method(node, linear_value):
        linear_value -= domutil.get_previous_AST_element_number(node) + 1
        node = domutil.get_parent_AST_element(node)
        context, functions = get_context(doc, [node], linear_value)
    else:
        function_node = domutil.get_function_element(node)
        name_node = domutil.get_first_child_by_tagname(function_node, 'name')
        name = domutil.get_text_content(name_node)
        for call_node in doc.getElementsByTagName('call'):
            call_name_node = domutil.get_first_child_by_tagname(call_node, 'name')
            call_name = domutil.get_text_content(call_name_node)
            if call_name == name:
                if config.get('show_context_search_debug'):
                    blue('beyound method '+name + ' and found call node: ')
                linear_value -= domutil.get_previous_AST_element_number_until_function(node) + 1
                context, functions = get_context(doc, [call_node], linear_value)
                functions[call_name] = function_node
                return context, functions
        if config.get('show_context_search_debug') == 'true':
            yellow('beyound method '+name+' but did not found call node: ')
    return context, functions
Esempio n. 2
0
def parse_function(node):
    """parse a <function> node.
    :return (type_name, function_name, ((type1, param1), (type2, param2), ...))
    """
    assert(domutil.is_element(node) and node.tagName == 'function')
    type_node = domutil.get_first_child_by_tagname(node, 'type')
    name_node = domutil.get_first_child_by_tagname(node, 'name')
    parameter_list_node = domutil.get_first_child_by_tagname(node, 'parameter_list')
    type_name = domutil.get_text_content(type_node)
    name = domutil.get_text_content(name_node)
    params = parse_parameter_list(parameter_list_node)
    return (type_name, name, params)
Esempio n. 3
0
def get_struct_alias(code):
    doc = domutil.get_doc_from_code(code)
    typedefs = doc.getElementsByTagName('typedef')
    if typedefs:
        # this is typedef struct xxx {} name;
        typedef = typedefs[0]
        type_node = domutil.get_first_child_by_tagname(typedef, 'type')
        struct_node = domutil.get_first_child_by_tagname(type_node, 'struct')
        if not struct_node:
            logger.warning('it is not a structure')
            return None
        alias_node = domutil.get_first_child_by_tagname(typedef, 'name')
        alias = domutil.get_text_content(alias_node)
        return alias
    return None
Esempio n. 4
0
def parse_recursive(include_path):
    """recursively parse a header file, including all the includes inside it.
    :param include_path: stdio.h
    :return a set contains all defines,structs,functions
    """
    logger.info('parsing: '+include_path)
    typedefs = set()
    defines = set()
    functions = set()
    to_parse_set = set()
    to_parse_set.add(include_path)
    parsed_set = set()
    while to_parse_set:
        include_path = to_parse_set.pop()
        parsed_set.add(include_path)
        full_path = include_path_to_full_path(include_path)
        if not full_path:
            logger.warning('this header does not exists: '+include_path)
            continue
        xml = srcmlutil.get_xml_from_file(full_path)
        doc = parseString(xml)
        for typedef_node in doc.getElementsByTagName('typedef'):
            name_node = domutil.get_first_child_by_tagname(typedef_node, 'name')
            name = domutil.get_text_content(name_node)
            if name and not name.startswith('_'):
                typedefs.add(name)
        for struct_node in doc.getElementsByTagName('struct'):
            name_node = domutil.get_first_child_by_tagname(struct_node, 'name')
            name = domutil.get_text_content(name_node)
            if name and not name.startswith('_'):
                typedefs.add(name)
        for define_node in doc.getElementsByTagName('cpp:define'):
            name_node = domutil.get_first_child_by_tagnames(define_node, 'cpp:macro', 'name')
            name = domutil.get_text_content(name_node)
            if name and not name.startswith('_'):
                defines.add(name)
        for function_node in doc.getElementsByTagName('function_decl'):
            name_node = domutil.get_first_child_by_tagname(function_node, 'name')
            name = domutil.get_text_content(name_node)
            if name and not name.startswith('_'):
                functions.add(name)
        for include_node in doc.getElementsByTagName('cpp:include'):
            name_node = domutil.get_first_child_by_tagname(include_node, 'cpp:file')
            name = domutil.get_text_content(name_node)
            name = name[1:-1] # remove "<>"
            if name not in parsed_set | to_parse_set:
                to_parse_set.add(name)
    return typedefs | defines | functions
Esempio n. 5
0
def func(directory):
    for root,_,files in os.walk(directory):
        for f in files:
            if f.endswith('.c'):
                filename = os.path.join(root, f)
                print(filename)
                with tempfile.TemporaryFile() as fp:
                    with open(filename) as f:
                        change_count = 0
                        for line in f:
                            if line.strip().endswith(';') and ',' in line:
                                if line.count('(') == line.count(')'):
                                    doc = domutil.get_doc_from_code(line)
                                    unit_nodes = doc.getElementsByTagName('unit')
                                    unit_node = unit_nodes.item(0)
                                    decl_stmt_node = domutil.get_first_child_by_tagname(unit_node, 'decl_stmt')
                                    if domutil.is_element(decl_stmt_node) and decl_stmt_node.tagName == 'decl_stmt':
                                        decl_nodes = domutil.get_children_by_tagname(decl_stmt_node, 'decl')
                                        if len(decl_nodes) > 1:
                                            change_count+=1
                                            # result = syntaxutil.parse_decl_stmt_code(line)
                                            full_type = get_type(line)
                                            # FIXME job *job, *sj = deserializeJob(nextjob,remlen,&nextjob,SER_MESSAGE);
                                            if full_type and '(' not in line:
                                                variables = line.split(',')[:]
                                                var1 = variables[0]
                                                line = var1 + ';\n' + ';\n'.join([full_type + ' ' + var for var in variables[1:]])
                            fp.write(line.encode('utf8'))
                        print('made '+str(change_count)+' changes to file: ' + filename)
                    fp.seek(0)
                    content = fp.read().decode('utf8')
                    with open(filename, 'w') as f:
                        f.write(content)
Esempio n. 6
0
def parse_decl(node):
    """Parse a <decl> node.
    Assume only one variable can be in the statement(guranteed by decl spliter preprocessor).
    :return (type,name,init)
    """
    assert(domutil.is_element(node) and node.tagName == 'decl')
    type_node = domutil.get_first_child_by_tagname(node, 'type')
    name_node = domutil.get_first_child_by_tagname(node, 'name')
    type_name = domutil.get_text_content(type_node)
    var_name = domutil.get_text_content(name_node)
    if '[' in var_name:
        new_var_name = var_name[:var_name.find('[')]
        new_type_name = type_name + var_name[var_name.find('['):]
        var_name = new_var_name
        type_name = new_type_name
    # TODO init
    return (type_name, var_name, None)
Esempio n. 7
0
def parse_decl_stmt(node):
    """Parse a <decl_stmt> node.
    Assume only one variable can be in the statement(guranteed by decl spliter preprocessor).
    :return (type, name, init)
    """
    assert(domutil.is_element(node) and node.tagName == 'decl_stmt')
    decl_node = domutil.get_first_child_by_tagname(node, 'decl')
    return parse_decl(decl_node)
Esempio n. 8
0
def parse_struct(node):
    """Parse a <struct> node
    Do not anonymous inner enum, union or structs.
    :return (name, [(type1, field1), (type1, field1), ...])
    """
    assert(domutil.is_element(node) and node.tagName == 'struct')
    name_node = domutil.get_first_child_by_tagname(node, 'name')
    block_node = domutil.get_first_child_by_tagname(node, 'block')
    if name_node:
        name = domutil.get_text_content(name_node)
    else:
        name = ''
    fields = []
    for decl_stmt_node in domutil.get_children_by_tagname(block_node, 'decl_stmt'):
        decl = parse_decl_stmt(decl_stmt_node)
        fields.append(decl[0:2])
    return (name, fields)
Esempio n. 9
0
def get_undefined_vars(nodes, resolved):
    """Get undefined variable.
    :param nodes: a list of dom nodes
    :param resolved: a set of names as strings that is assumed to be defined
    :return a set of names
    """
    result = set()
    for node in nodes:
        if domutil.is_element(node):
            if node.tagName == 'decl_stmt':
                _,var_name,_ = syntaxutil.parse_decl_stmt(node)
                resolved.add(var_name)
            elif node.tagName == 'expr':
                # in #ifdef, there may be `#elif defined(__sun)`
                if domutil.in_node(node, 'cpp:ifdef', level=2) or\
                    domutil.in_node(node, 'cpp:elif', level=2) or\
                    domutil.in_node(node, 'cpp:ifndef', level=2):
                    continue
                name_set = syntaxutil.parse_expr(node)
                for name in name_set:
                    # uint8_t, false, true, NULL
                    if sys.resolve_single(name):
                        continue
                    # here we find the undefined variable
                    if name not in resolved and name not in result:
                        result.add(name)
            elif node.tagName == 'for':
                init_node = domutil.get_first_child_by_tagname(node, 'init')
                if init_node:
                    _, var = syntaxutil.parse_for_init(init_node)
                    if var:
                        resolved.add(var)
            elif node.tagName == 'parameter_list':
                params = syntaxutil.parse_parameter_list(node)
                for _,name in params:
                    resolved.add(name)
            elif node.tagName == 'cpp:define':
                value_node = domutil.get_first_child_by_tagname(node, 'cpp:value')
                text = domutil.get_text_content(value_node)
                match = re.findall(r'([A-Z_]{2,})', text)
                for m in match:
                    if m not in resolved:
                        result.add(m)
            new_result = get_undefined_vars(node.childNodes, resolved | result)
            result.update(new_result)
    return result
Esempio n. 10
0
def extract_calls(node):
    """Extract all <call><name> inside the node.
    :return a list of names as strings
    """
    names = []
    for call_node in node.getElementsByTagName('call'):
        name_node = domutil.get_first_child_by_tagname(call_node, 'name')
        names.append(domutil.get_text_content(name_node))
    return names
Esempio n. 11
0
def get_struct_name(code):
    doc = domutil.get_doc_from_code(code)
    structs = doc.getElementsByTagName('struct')
    if structs:
        struct = structs[0]
        name_node = domutil.get_first_child_by_tagname(struct, 'name')
        name = domutil.get_text_content(name_node)
        return name
    return None
Esempio n. 12
0
def extract_to_resolve(node, resolved):
    """Extract functions, types, undefined global variables to resolve
    :param node: dom node that need to resolve
    :return a set
    """
    functions = set()
    types = set()
    unknown = set()
    for n in node.getElementsByTagName('call'):
        # in #ifdef, there may be `#elif defined(__sun)`
        if domutil.in_node(n, 'cpp:ifdef', level=2) or\
            domutil.in_node(n, 'cpp:elif', level=2) or\
            domutil.in_node(n, 'cpp:ifndef', level=2):
            continue
        call_name_node = domutil.get_first_child_by_tagname(n, 'name')
        call_name = domutil.get_text_content(call_name_node)
        functions.add(call_name)
    for n in node.getElementsByTagName('type'):
        if domutil.in_node(n, 'cpp:define', level=4): continue
        name_node = domutil.get_first_child_by_tagname(n, 'name')
        name = domutil.get_text_content(name_node)
        types.add(name)
    for n in node.getElementsByTagName('cpp:value'):
        value = domutil.get_text_content(n)
        types |= syntaxutil.parse_type_cast(value)
    for n in node.getElementsByTagName('cpp:define'):
        functions |= syntaxutil.parse_cpp_define(n)
    for n in node.getElementsByTagName('cpp:value'):
        # now lets resolve every word
        value = domutil.get_text_content(n)
        for word in re.findall(r'\b\w+\b', value):
            unknown.add(word)
    # if function return type if enum, the function is not marked as <function>
    # but what if the enum is trully enum? This is addressed in resolver/localfunc.py
    for n in node.getElementsByTagName('enum'):
        name_node = domutil.get_first_child_by_tagname(n, 'name')
        name = domutil.get_text_content(name_node)
        functions.add(name)
    variables = io.get_undefined_vars([node], resolved)
    if '' in functions: functions.remove('')
    if '' in types: types.remove('')
    if '' in variables: variables.remove('')
    # return functions-resolved, types-resolved, variables
    return (functions | types | variables | unknown) - resolved
Esempio n. 13
0
def parse_typedef(node):
    """Parse a <typedef> node
    <typedef>typedef <type>struct <name>A</name> *</type> <name>hello_t</name>;</typedef>
    <typedef>typedef <type><struct>struct <name>_stritem</name> <block> ... </struct></type> <name>item</name>;</typedef>
    :return (alias, original)
    """
    assert(domutil.is_element(node) and node.tagName == 'typedef')
    type_node = domutil.get_first_child_by_tagname(node, 'type')
    # support <function_decl> in <typedef>, i.e. typdef void *func(int a, int b)
    # return: (func, '')
    if not type_node:
        function_decl_node = domutil.get_first_child_by_tagname(node, 'function_decl')
        name_node = domutil.get_first_child_by_tagname(function_decl_node, 'name')
        alias = domutil.get_text_content(name_node)
        return (alias.strip(), '')
    name_node = domutil.get_first_child_by_tagname(node, 'name')
    original = domutil.get_text_content_except(type_node, 'block')
    alias = domutil.get_text_content(name_node)
    return (alias.strip(), original.strip())
Esempio n. 14
0
def parse_parameter_list(node):
    """Parse a <parameter_list> of a function.
    :return ((type1,var1), (type2,var2), ...)
    """
    assert(domutil.is_element(node) and node.tagName == 'parameter_list')
    result = []
    for param_node in domutil.get_children_by_tagname(node, 'param'):
        decl_node = domutil.get_first_child_by_tagname(param_node, 'decl')
        if decl_node:
            type_name,var_name,_ = parse_decl(decl_node)
            result.append((type_name, var_name))
    return result
Esempio n. 15
0
def parse_for_init(node):
    """Parse init statement in for.
    Assume only one variable in decl.
    :return (type1,var1)
    """
    assert(domutil.is_element(node) and node.tagName == 'init')
    decl_node = domutil.get_first_child_by_tagname(node, 'decl')
    if decl_node:
        type_name, var_name, _ = parse_decl(decl_node)
        return (type_name, var_name)
    else:
        return (None, None)
Esempio n. 16
0
def parse_cpp_define(node):
    """parse a #define statement. <cpp:define>
    :return a set of function name to resolve
    """
    to_resolve = set()
    # cpp_macro = domutil.get_first_child_by_tagname(node, 'cpp:macro')
    cpp_value = domutil.get_first_child_by_tagname(node, 'cpp:value')
    # param_list_node = domutil.get_first_child_by_tagname(cpp_macro, 'parameter_list')
    # params = parse_parameter_list(param_list_node)
    value = domutil.get_text_content(cpp_value)
    doc = domutil.get_doc_from_code(value)
    for call_node in doc.getElementsByTagName('call'):
        name_node = domutil.get_first_child_by_tagname(call_node, 'name')
        name = domutil.get_text_content(name_node)
        to_resolve.add(name)
    # emitf(__LINE__, "\t" __VA_ARGS__
    # this will be passed as <macro>
    for macro_node in doc.getElementsByTagName('macro'):
        name_node = domutil.get_first_child_by_tagname(macro_node, 'name')
        name = domutil.get_text_content(name_node)
        to_resolve.add(name)
    return to_resolve
Esempio n. 17
0
def resolve_undefined_vars(nodes, resolved):
    """Get undefined variable. Resolve its type.
    :param nodes: a list of dom nodes
    :param resolved: a set of names as strings that is assumed to be defined
    :return {var: type, ...}
    """
    result = {}
    for node in nodes:
        if domutil.is_element(node):
            if node.tagName == 'decl_stmt':
                _,var_name,_ = syntaxutil.parse_decl_stmt(node)
                resolved.add(var_name)
            elif node.tagName == 'expr':
                # in #ifdef, there may be `#elif defined(__sun)`
                if domutil.in_node(node, 'cpp:ifdef', level=2) or\
                    domutil.in_node(node, 'cpp:elif', level=2) or\
                    domutil.in_node(node, 'cpp:ifndef', level=2):
                    continue
                name_set = syntaxutil.parse_expr(node)
                for name in name_set:
                    # uint8_t, false, true, NULL
                    if sys.resolve_single(name):
                        continue
                    # here we find the undefined variable
                    if name not in resolved and name not in result:
                        type_name = resolve_local_type(name, node)
                        if type_name:
                            result[name] = type_name
                        # if it is a global variable, do not print warning
                        elif not local.check_global_variable(name):
                            logger.warning('undefind variable ' + name + ' not resovled for local type.')
            elif node.tagName == 'for':
                init_node = domutil.get_first_child_by_tagname(node, 'init')
                if init_node:
                    _, var = syntaxutil.parse_for_init(init_node)
                    if var:
                        resolved.add(var)
            new_result = resolve_undefined_vars(node.childNodes, resolved | result.keys())
            result.update(new_result)
    return result
Esempio n. 18
0
def instrument_segment(doc, segment):
    first_node = segment[0]
    last_node = segment[-1]
    parent = first_node.parentNode
    pre = doc.createElement('comment')
    pre.appendChild(doc.createTextNode('\n//@Pre\n'))
    parent.insertBefore(pre, first_node)
    post = doc.createElement('comment')
    post.appendChild(doc.createTextNode('\n//@Post\n'))
    parent.insertBefore(post, last_node.nextSibling)
    segment.insert(0, pre)
    segment.append(post)
    # loop invariant
    if config.get('instrument_loop') == 'true':
        for node in segment:
            if domutil.is_element(node) and node.tagName == 'for' or node.tagName == 'while':
                block_node = domutil.get_first_child_by_tagname(node, 'block')
                if not block_node: continue
                inner = doc.createElement('comment')
                inner.appendChild(doc.createTextNode('\n//@Inner\n'))
                block_node.insertBefore(inner, block_node.lastChild)
    return segment