Exemple #1
0
def which_fix_goes_first(program, fix1, fix2):
    try:
        fix1_location = extract_line_number(' '.join(fix1.split()[1:]))
        fix2_location = extract_line_number(' '.join(fix2.split()[1:]))
    except Exception:
        #print fix1
        #print fix2
        raise
    
    if not fix_ids_are_in_program(recompose_program(get_lines(program)[fix2_location:]), fix2) and fix_ids_are_in_program(recompose_program(get_lines(program)[fix1_location:]), fix1):
        return fix1
    
    if not fix_ids_are_in_program(recompose_program(get_lines(program)[fix1_location:]), fix1) and fix_ids_are_in_program(recompose_program(get_lines(program)[fix2_location:]), fix2):
        return fix2
    
    if not fix_ids_are_in_program(recompose_program(get_lines(program)[fix1_location:]), fix1) and not fix_ids_are_in_program(recompose_program(get_lines(program)[fix2_location:]), fix2):
        raise CouldNotFindUsesForEitherException
    
    if fix1_location < fix2_location:
        return fix1
    elif fix2_location < fix1_location:
        return fix2
    
    prog_lines = get_lines(program)
    id_in_fix1 = None
    id_in_fix2 = None
    
    for token in fix1.split():
        if '_<id>_' in token:
            assert id_in_fix1 == None, fix1
            id_in_fix1 = token
        elif token == '_<op>_[':
            break
    
    for token in fix2.split():
        if '_<id>_' in token:
            assert id_in_fix2 == None, fix2
            id_in_fix2 = token
        elif token == '_<op>_[':
            break
    
    assert id_in_fix1 != id_in_fix2, fix1 + ' & ' + fix2
    assert fix1_location == fix2_location
    
    for i in range(fix1_location, len(prog_lines)):
        for token in prog_lines[i].split():
            if token == id_in_fix1:
                return fix1
            elif token == id_in_fix2:
                return fix2
    
    assert False, 'unreachable code'
    raise CouldNotFindUsesForEitherException
Exemple #2
0
    def _sanitize_brackets(self, tokens_string):
        lines = get_lines(tokens_string)

        if len(lines) == 1:
            # Should be lines == ['']???
            raise EmptyProgramException(tokens_string)

        for i in range(len(lines) - 1, -1, -1):
            line = lines[i]

            if line.strip() == '_<op>_}' or line.strip() == '_<op>_} _<op>_}' \
               or line.strip() == '_<op>_} _<op>_} _<op>_}' or line.strip() == '_<op>_} _<op>_;' \
               or line.strip() == '_<op>_} _<op>_} _<op>_} _<op>_}' \
               or line.strip() == '_<op>_{' \
               or line.strip() == '_<op>_{ _<op>_{':
                if i > 0:
                    lines[i - 1] += ' ' + line.strip()
                    lines[i] = ''
                else:
                    # can't handle this case!
                    return ''

        # Remove empty lines
        for i in range(len(lines) - 1, -1, -1):
            if lines[i] == '':
                del lines[i]

        for line in lines:
            assert (lines[i].strip() != '')
            # Should be line instead of lines[i]???

        return recompose_program(lines)
def meets_criterion(incorrect_program_tokens, fix, name_dict, type_, name_seq=None, silent=True):
    lines = get_lines(incorrect_program_tokens)
    fix = _truncate_fix(fix)

    if _is_stop_signal(fix):
        #print 'is stop signal'
        return False

    try:
        fix_line_number = extract_line_number(fix)
    except Exception:
        #print 'failed to extract line number from fix'
        return False

    if fix_line_number >= len(lines):
        #print 'localization is pointing to line that doesn\'t exist'
        return False

    fix_line = lines[fix_line_number]

    # Make sure number of IDs is the same
    if len(re.findall('_<id>_\w*', fix_line)) != len(re.findall('_<id>_\w*', fix)):
        if not silent:
            print 'number of ids is not the same'
        return False

    keywords_regex = '_<keyword>_\w+|_<type>_\w+|_<APIcall>_\w+|_<include>_\w+'

    if type_ == 'replace' and re.findall(keywords_regex, fix_line) != re.findall(keywords_regex, fix):
        if not silent:
            print 'important words (keywords, etc.) change drastically'
        return False

    return True
def meets_criterion(incorrect_program_tokens, fix, type_, silent=True):
    lines = get_lines(incorrect_program_tokens)
    fix = _truncate_fix(fix)

    if _is_stop_signal(fix):
        return False

    try:
        fix_line_number = extract_line_number(fix)
    except FailedToGetLineNumberException:
        return False

    if fix_line_number >= len(lines):
        return False

    fix_line = lines[fix_line_number]

    # Make sure number of IDs is the same
    if len(re.findall('_<id>_\w*', fix_line)) != len(
            re.findall('_<id>_\w*', fix)):
        if not silent:
            print 'number of ids is not the same'
        return False

    keywords_regex = '_<keyword>_\w+|_<type>_\w+|_<APIcall>_\w+|_<include>_\w+'

    if type_ == 'replace' and re.findall(
            keywords_regex, fix_line) != re.findall(keywords_regex, fix):
        if not silent:
            print 'important words (keywords, etc.) change drastically'
        return False

    return True
Exemple #5
0
def add_fix_number(corrupted_prog, fix_number):
    try:
        lines = get_lines(corrupted_prog)
    except Exception:
        print corrupted_prog
        raise
    
    last_line = '_<directive>_#include _<include>_<FixNumber_%d>' % fix_number
    lines.append(last_line)
        
    return recompose_program(lines)
Exemple #6
0
def do_fix_at_line(corrupted_prog, line, fix):
    try:
        lines = get_lines(corrupted_prog)
    except Exception:
        print corrupted_prog
        raise
    
    try:
        lines[line] = fix
    except IndexError:
        raise
        
    return recompose_program(lines)
def apply_fix(program, fix, kind='replace', check_literals=False):
    # Break up program string into lines
    lines = get_lines(program)

    # Truncate the fix
    fix = _truncate_fix(fix)

    # Make sure there are two parts
    if len(fix.split('~')) != 2:
        raise InvalidFixLocationException

    # Retrieve insertion location
    try:
        if kind == 'replace':
            fix_location = extract_line_number(fix)
        else:
            assert kind == 'insert'

            if fix.split()[0] != '_<insertion>_':
                print "Warning: First token did not suggest insertion (should not happen)"

            fix_location = extract_line_number(' '.join(fix.split()[1]))
    except FailedToGetLineNumberException:
        raise InvalidFixLocationException

    # Remove line number
    fix = _remove_line_number(fix)

    # Insert the fix
    if kind == 'replace':
        try:
            if lines[fix_location].count('_<id>_') != fix.count('_<id>_'):
                raise SubstitutionFailedException
            if check_literals:
                for lit in ['string', 'char', 'number']:
                    if lines[fix_location].count('_<%s>' % lit) != fix.count('_<%s>_' % lit):
                        raise SubstitutionFailedException

            lines[fix_location] = replace_ids(fix, lines[fix_location])
        except IndexError:
            raise InvalidFixLocationException
    else:
        assert kind == 'insert'
        lines.insert(fix_location+1, fix)

    return recompose_program(lines)
Exemple #8
0
def undeclare_variable(rng, program_string):
    # Lines
    orig_lines = get_lines(program_string)
    # Variables
    variables = []
    for token in program_string.split():
        if '_<id>_' in token and token not in variables:
            variables.append(token)
    # Look for a declaration
    declaration, declaration_pos = find_declaration(
        rng, variables, list(range(len(orig_lines))), orig_lines)
    # Find the function signature
    fix_line = insert_fix(declaration_pos, orig_lines)
    fix = '_<insertion>_ {} ~ {}'.format(' '.join(str(fix_line)), declaration)
    # ...
    if orig_lines[declaration_pos].strip() == '':
        del orig_lines[declaration_pos]
    return recompose_program(orig_lines), fix, fix_line
Exemple #9
0
def do_fix_at_line(corrupted_prog, line, fix):
    try:
        lines = get_lines(corrupted_prog)
    except Exception:
        print corrupted_prog
        raise
    if '~' in fix:
        try:
            fix = fix.split(' ~ ')[1]
            fix = fix.strip()
        except:
            print fix, fix.split(' ~ ')
            raise
    try:
        lines[line] = fix
    except IndexError:
        raise
    return recompose_program(lines)
Exemple #10
0
def token_mutate_for_tsne_with_specific_errors(prog, num_mutations, action, include_kind=False):
    assert num_mutations > 0, "Invalid argument(s) supplied to the function token_mutate"        
    global mutator_obj
    specific_mutate = mutator_obj.specific_mutate
    corrupt_fix_pair = set()
    
    for _ in range(1):
        this_corrupted = prog
        lines = set()
        mutation_count = 0
        loop_counter = 0
        loop_count_threshold = 50

        if include_kind:
            fix_kinds = {}        
        
        while(mutation_count < num_mutations):
            loop_counter += 1
            if loop_counter == loop_count_threshold:
                print "mutation_count", mutation_count                
                raise LoopCountThresholdExceededException
            line = None
            
            if include_kind:
                this_corrupted, fix, line, kind = specific_mutate(prog, this_corrupted, action, include_kind=True)
            else:
                this_corrupted, fix, line = specific_mutate(prog, this_corrupted, action)

            if line is not None:
                fix = fetch_line(prog, line)
                corrupt_line = fetch_line(this_corrupted, line)

                if fix != corrupt_line:
                    lines.add(line)
                    mutation_count += 1

                    if include_kind:
                        if str(line) not in fix_kinds:
                            fix_kinds[str(line)] = [kind]
                        else:
                            fix_kinds[str(line)].append(kind)
    
        assert len(lines) > 0, "Could not mutate!"
        
        empty_line_in_corrupted = False
        for _line_ in get_lines(this_corrupted):
            if _line_.strip() == '':
                empty_line_in_corrupted = True
                break
                
        if empty_line_in_corrupted:
            continue
        
        sorted_lines = sorted(lines)

        for line in sorted_lines:
            fix = fetch_line(prog, line)
            corrupt_line = fetch_line(this_corrupted, line)
            assert len(fetch_line(prog, line, include_line_number=False).strip()) != 0, "empty fix" 
            assert len(fetch_line(this_corrupted, line, include_line_number=False).strip()) != 0, "empty corrupted line"
            if fix != corrupt_line:
                corrupt_fix_pair.add((this_corrupted, fix))
                break
        
    return list(corrupt_fix_pair)
Exemple #11
0
def token_mutate_series_any_fix(prog, max_num_mutations, num_mutated_progs, include_kind=False):
    assert max_num_mutations > 0 and num_mutated_progs > 0, "Invalid argument(s) supplied to the function token_mutate"
    
    global mutator_obj    
    
    corrupt_fix_pair = set()
    
    for _ in range(num_mutated_progs):
        num_mutations = random.choice(range(max_num_mutations)) + 1
        this_corrupted = prog
        lines = set()
        mutation_count = 0
        loop_counter = 0
        loop_count_threshold = 50

        if include_kind:
            fix_kinds = {}        
        
        while(mutation_count < num_mutations):
            loop_counter += 1
            if loop_counter == loop_count_threshold:
                print "mutation_count", mutation_count                
                raise LoopCountThresholdExceededException
            line = None
            
            if include_kind:
                this_corrupted, fix, line, kind = mutator_obj.easy_mutate2(prog, this_corrupted, include_kind=True)
            else:
                this_corrupted, fix, line = mutator_obj.easy_mutate2(prog, this_corrupted)

            if line is not None:
                fix = fetch_line(prog, line)
                corrupt_line = fetch_line(this_corrupted, line)

                if fix != corrupt_line:
                    lines.add(line)
                    mutation_count += 1

                    if include_kind:
                        if str(line) not in fix_kinds:
                            fix_kinds[str(line)] = [kind]
                        else:
                            fix_kinds[str(line)].append(kind)
    
        assert len(lines) > 0, "Could not mutate!"
        
        flag_empty_line_in_corrupted = False
        for _line_ in get_lines(this_corrupted):
            if _line_.strip() == '':
                flag_empty_line_in_corrupted = True
                break
                
        if flag_empty_line_in_corrupted:
            continue
        
        lines = sorted(lines)
        ranked_lines = map(lambda x:(x,lines.index(x)+1), lines)
        random.shuffle(ranked_lines)
        random.shuffle(lines)

        for line, fix_number in ranked_lines:
            fix = fetch_line(prog, line)
            corrupt_line = fetch_line(this_corrupted, line)
            assert len(fetch_line(prog, line, include_line_number=False).strip()) != 0, "empty fix" 
            assert len(fetch_line(this_corrupted, line, include_line_number=False).strip()) != 0, "empty corrupted line"
            if fix != corrupt_line:
                if include_kind:
                    if len(fix_kinds[str(line)]) == 1: # remove later
                        for kind in fix_kinds[str(line)]:
                            corrupt_fix_pair.add((this_corrupted, fix, fix_number, kind))
                else:
                    corrupt_fix_pair.add((this_corrupted, fix, fix_number))
            
                try:
                    this_corrupted = do_fix_at_line(this_corrupted, line, fetch_line(prog, line, include_line_number=False))
                except IndexError:
                    raise
                
    if include_kind:
        return map( lambda (w,x,y,z):(add_fix_number(w, y), x, z), list(corrupt_fix_pair))
    else:
        return map( lambda (w,x,y):(add_fix_number(w, y), x), list(corrupt_fix_pair))
def undeclare_variable(rng,
                       old_program,
                       program_string,
                       deleted_ids,
                       name_dict=None,
                       print_debug_messages=False):
    if name_dict is not None:
        rev_name_dict = get_rev_dict(name_dict)

    # Lines
    orig_lines = get_lines(program_string)
    old_lines = get_lines(old_program)

    # Lines to ignore
    struct_lines = []
    structs_deep = 0

    for i, line in enumerate(orig_lines):
        if len(re.findall('_<keyword>_struct _<id>_\d@ _<op>_\{', line)) > 0 or \
           len(re.findall('_<keyword>_union _<id>_\d@ _<op>_\{', line)) > 0 or \
           len(re.findall('_<keyword>_enum _<id>_\d@ _<op>_\{', line)) > 0:
            structs_deep += len(re.findall('_<op>_\{', line))
        elif structs_deep > 0:
            structs_deep += len(re.findall('_<op>_\{', line))
            structs_deep -= len(re.findall('_<op>_\}', line))
            assert structs_deep >= 0, str(structs_deep) + " " + line
            struct_lines.append(i)

    global_lines = []
    brackets_deep = 0

    for i, line in enumerate(orig_lines):
        if len(re.findall('_<op>_\{', line)) > 0 or len(
                re.findall('_<op>_\}', line)) > 0:
            brackets_deep += len(re.findall('_<op>_\{', line))
            brackets_deep -= len(re.findall('_<op>_\}', line))
            assert brackets_deep >= 0, str(brackets_deep) + " " + line
        elif brackets_deep == 0:
            global_lines.append(i)

    if print_debug_messages:
        print 'Ignoring lines:', struct_lines
        print 'Ignoring lines:', global_lines

        for line in sorted(set(struct_lines + global_lines)):
            print "-", orig_lines[line]

    # Variables
    variables = []

    for token in program_string.split():
        if '_<id>_' in token:
            if token not in variables:
                variables.append(token)

    assert len(orig_lines) == len(old_lines)

    # Look for a declaration
    done = False

    rng.shuffle(variables)

    for to_undeclare in variables:
        if print_debug_messages:
            print 'Looking for:', rev_name_dict[to_undeclare], '...'

        # Find a location (scope) to undeclare it from
        shuffled_lines = list(
            set(range(len(orig_lines))) - set(struct_lines + global_lines))
        rng.shuffle(shuffled_lines)

        # NEW
        regex_alone_use = '(_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)((?: _<op>_\*)* %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])*)(?: _<op>_= [^,;]+)(?: _<op>_;)' % to_undeclare
        regex_alone = '((?:_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)(?: _<op>_\*)* %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])* _<op>_;)' % to_undeclare
        regex_group_leader = '((?:_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)(?: _<op>_\*)*)( %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])*)(?: _<op>_= [^,;]+)?( _<op>_,)(?:(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)? _<op>_,)*(?:(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)? _<op>_;)' % to_undeclare
        regex_group = '(_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?(?: _<op>_,(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?)*( _<op>_,(?: _<op>_\*)* %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?)(?: _<op>_,(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?)*(?: _<op>_;)' % to_undeclare

        fix_line = None
        declaration = None
        declaration_pos = None

        # Start our search upwards
        for i in shuffled_lines:
            if len(re.findall(regex_alone_use, orig_lines[i])) == 1:
                if print_debug_messages:
                    print("On line %d:" % i), tokens_to_source(
                        orig_lines[i], name_dict, clang_format=True)
                    print "Found Alone use", re.findall(
                        regex_alone_use, orig_lines[i])
                m = re.search(regex_alone_use, orig_lines[i])
                declaration = orig_lines[i][m.start(1):m.end(2)] + ' _<op>_;'
                declaration_pos = i

                # Mutate
                orig_lines[i] = orig_lines[i][:m.start(1)] + orig_lines[i][
                    m.end(1) + 1:]
                done = True
                break

            if len(re.findall(regex_alone, orig_lines[i])) == 1:
                if print_debug_messages:
                    print("On line %d:" % i), tokens_to_source(
                        orig_lines[i], name_dict, clang_format=True)
                    print "Found Alone", re.findall(regex_alone, orig_lines[i])
                m = re.search(regex_alone, orig_lines[i])
                declaration = orig_lines[i][m.start(1):m.end(1)]
                declaration_pos = i

                # Mutate
                orig_lines[i] = orig_lines[i][:m.start(1)] + orig_lines[i][
                    m.end(1) + 1:]
                done = True
                break

            elif len(re.findall(regex_group, orig_lines[i])) == 1:
                if print_debug_messages:
                    print("On line %d:" % i), tokens_to_source(
                        orig_lines[i], name_dict, clang_format=True)
                    print "Found Group", re.findall(regex_group, orig_lines[i])
                m = re.search(regex_group, orig_lines[i])
                declaration = orig_lines[i][m.start(1):m.end(1)] + orig_lines[
                    i][m.start(2):m.end(2)][8:] + ' _<op>_;'
                declaration_pos = i

                try:
                    end_of_declr = declaration.index('_<op>_=')
                    declaration = declaration[:end_of_declr]
                except ValueError:
                    pass

                # Mutate
                orig_lines[i] = orig_lines[i][:m.start(2) +
                                              1] + orig_lines[i][m.end(2) + 1:]
                done = True
                break

            elif len(re.findall(regex_group_leader, orig_lines[i])) == 1:
                if print_debug_messages:
                    print("On line %d:" % i), tokens_to_source(
                        orig_lines[i], name_dict, clang_format=True)
                    print "Found Group Leader", re.findall(
                        regex_group_leader, orig_lines[i])
                m = re.search(regex_group_leader, orig_lines[i])
                declaration = orig_lines[i][m.start(1):m.end(2)] + ' _<op>_;'
                declaration_pos = i

                # Mutate
                orig_lines[i] = orig_lines[i][:m.start(2) +
                                              1] + orig_lines[i][m.end(3) + 1:]
                done = True
                break

        if done:
            break

    if not done:
        # Failed to find something to undeclare
        raise NothingToMutateException

    # Find the function signature
    fn_regex = '(?:_<keyword>_(?:struct|union|enum) _<id>_\d+@|_<type>_\w+|_<keyword>_void)(?: _<op>_\*)* (?:_<id>_\d+@|_<APIcall>_main) _<op>_\('
    fn_start_regex = '_<op>_\{'
    inserted = False

    assert declaration_pos != None
    for i in range(declaration_pos, 0, -1):
        if len(re.findall(fn_regex, old_lines[i])) == 1:
            for j in range(i, len(old_lines)):
                if len(re.findall(fn_start_regex, old_lines[i])) >= 1:
                    fix_line = j
                    break
            inserted = True

        if inserted:
            break

    if not inserted:
        # print Failed to insert fix
        raise FailedToMutateException
    if fix_line is None:
        # Couldn't find { after function definition
        raise FailedToMutateException

    fix = '_<insertion>_ '

    assert fix_line is not None

    for digit in str(fix_line):
        fix += str(digit) + ' '

    fix += '~ ' + declaration

    to_delete = False

    if orig_lines[declaration_pos].strip() == '':
        to_delete = declaration_pos
        del orig_lines[to_delete]

    recomposed_program = ''

    for i, line in enumerate(orig_lines):
        for digit in str(i):
            recomposed_program += digit + ' '

        recomposed_program += '~ '
        recomposed_program += line + ' '

    return recomposed_program, fix, fix_line
def undeclare_variable(rng, old_program, program_string):
    # Lines
    orig_lines = get_lines(program_string)
    old_lines = get_lines(old_program)

    # Lines to ignore
    struct_lines = []
    structs_deep = 0

    for i, line in enumerate(orig_lines):
        # Should be _<id>_\d+ ???
        if len(re.findall('_<keyword>_struct _<id>_\d@ _<op>_\{', line)) > 0 or \
           len(re.findall('_<keyword>_union _<id>_\d@ _<op>_\{', line)) > 0 or \
           len(re.findall('_<keyword>_enum _<id>_\d@ _<op>_\{', line)) > 0:
            structs_deep += len(re.findall('_<op>_\{', line))
        elif structs_deep > 0:
            structs_deep += len(re.findall('_<op>_\{', line))
            structs_deep -= len(re.findall('_<op>_\}', line))
            assert structs_deep >= 0, str(structs_deep) + " " + line
            struct_lines.append(i)

    global_lines = []
    brackets_deep = 0

    for i, line in enumerate(orig_lines):
        if len(re.findall('_<op>_\{', line)) > 0 or len(
                re.findall('_<op>_\}', line)) > 0:
            brackets_deep += len(re.findall('_<op>_\{', line))
            brackets_deep -= len(re.findall('_<op>_\}', line))
            assert brackets_deep >= 0, str(brackets_deep) + " " + line
        elif brackets_deep == 0:
            global_lines.append(i)

    # Variables
    variables = []

    for token in program_string.split():
        if '_<id>_' in token:
            if token not in variables:
                variables.append(token)

    # Look for a declaration
    done = False

    rng.shuffle(variables)

    for to_undeclare in variables:

        # Find a location (scope) to undeclare it from
        shuffled_lines = list(
            set(range(len(orig_lines))) - set(struct_lines + global_lines))
        rng.shuffle(shuffled_lines)

        # NEW
        # Should consider const case and typedef???
        regex_alone_use = '(_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)((?: _<op>_\*)* %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])*)(?: _<op>_= [^,;]+)(?: _<op>_;)' % to_undeclare
        regex_alone = '((?:_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)(?: _<op>_\*)* %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])* _<op>_;)' % to_undeclare
        regex_group_leader = '((?:_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)(?: _<op>_\*)*)( %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])*)(?: _<op>_= [^,;]+)?( _<op>_,)(?:(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)? _<op>_,)*(?:(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)? _<op>_;)' % to_undeclare
        regex_group = '(_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?(?: _<op>_,(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?)*( _<op>_,(?: _<op>_\*)* %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?)(?: _<op>_,(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?)*(?: _<op>_;)' % to_undeclare

        fix_line = None
        declaration = None
        declaration_pos = None

        # Start our search upwards
        for i in shuffled_lines:
            if len(re.findall(regex_alone_use, orig_lines[i])) == 1:
                m = re.search(regex_alone_use, orig_lines[i])
                declaration = orig_lines[i][m.start(1):m.end(2)] + ' _<op>_;'
                declaration_pos = i

                # Mutate
                orig_lines[i] = orig_lines[i][:m.start(1)] + orig_lines[i][
                    m.end(1) + 1:]
                done = True
                break

            if len(re.findall(regex_alone, orig_lines[i])) == 1:
                m = re.search(regex_alone, orig_lines[i])
                declaration = orig_lines[i][m.start(1):m.end(1)]
                declaration_pos = i

                # Mutate
                orig_lines[i] = orig_lines[i][:m.start(1)] + orig_lines[i][
                    m.end(1) + 1:]
                done = True
                break

            elif len(re.findall(regex_group, orig_lines[i])) == 1:
                m = re.search(regex_group, orig_lines[i])
                declaration = orig_lines[i][m.start(1):m.end(1)] + orig_lines[
                    i][m.start(2):m.end(2)][8:] + ' _<op>_;'
                declaration_pos = i

                try:
                    end_of_declr = declaration.index('_<op>_=')
                    declaration = declaration[:end_of_declr]
                except ValueError:
                    pass

                # Mutate
                orig_lines[i] = orig_lines[i][:m.start(2) +
                                              1] + orig_lines[i][m.end(2) + 1:]
                done = True
                break

            elif len(re.findall(regex_group_leader, orig_lines[i])) == 1:
                m = re.search(regex_group_leader, orig_lines[i])
                declaration = orig_lines[i][m.start(1):m.end(2)] + ' _<op>_;'
                declaration_pos = i

                # Mutate
                orig_lines[i] = orig_lines[i][:m.start(2) +
                                              1] + orig_lines[i][m.end(3) + 1:]
                done = True
                break

    if not done:
        # Failed to find something to undeclare
        raise NothingToMutateException

    # Find the function signature
    fn_regex = '(?:_<keyword>_(?:struct|union|enum) _<id>_\d+@|_<type>_\w+|_<keyword>_void)(?: _<op>_\*)* (?:_<id>_\d+@|_<APIcall>_main) _<op>_\('
    fn_start_regex = '_<op>_\{'
    inserted = False

    assert declaration_pos is not None
    # Why 0 instead of -1???
    for i in range(declaration_pos, 0, -1):
        if len(re.findall(fn_regex, old_lines[i])) == 1:
            for j in range(i, len(old_lines)):
                # Why i instead of j?
                if len(re.findall(fn_start_regex, old_lines[i])) >= 1:
                    fix_line = j
                    break
            inserted = True

        if inserted:
            break
    # ^ May boom: int x = 0; /*eol*/ int y = x;

    if not inserted:
        # print Failed to insert fix
        raise FailedToMutateException
    if fix_line is None:
        # Couldn't find { after function definition
        raise FailedToMutateException

    fix = '_<insertion>_ '

    assert fix_line is not None

    for digit in str(fix_line):
        fix += str(digit) + ' '

    fix += '~ ' + declaration

    if orig_lines[declaration_pos].strip() == '':
        to_delete = declaration_pos
        del orig_lines[to_delete]

    recomposed_program = ''

    for i, line in enumerate(orig_lines):
        for digit in str(i):
            recomposed_program += digit + ' '

        recomposed_program += '~ '
        recomposed_program += line + ' '

    return recomposed_program, fix, fix_line
def apply_fix(program, fix, kind='replace', check_literals=False):
    print "apply_fix passed"
    # Break up program string into lines
    lines = get_lines(program)

    print "*******************"
    print "lines ="
    print lines
    print "*******************"
    print "lines length :", len(lines)
    # Truncate the fix
    fix = _truncate_fix(fix)
    print "*******************"
    print "fix ="
    print fix
    print "*******************"
    print "fix.split('~') :", fix.split('~')
    print "len(fix.split('~')) :", len(fix.split('~'))
    # Make sure there are two parts
    if len(fix.split('~')) != 2:
        print "InvalidFixLocationExeption"
        print "can not split 2 part"
        raise InvalidFixLocationException
    print "Retrieve insertion location"
    # Retrieve insertion location
    try:
        print "if replace 1"
        if kind == 'replace':
            fix_location = extract_line_number(fix)
            print "kind == replace"
            print "*******************"
            print "fix_location ="
            print fix_location
            print "*******************"
        else:
            assert kind == 'insert'

            if fix.split()[0] != '_<insertion>_':
                print "Warning: First token did not suggest insertion (should not happen)"

            fix_location = extract_line_number(' '.join(fix.split()[1]))
            print "*******************"
            print "fix_location =="
            print fix_location
            print "*******************"
    except FailedToGetLineNumberException:
        raise InvalidFixLocationException
    print "Remove line number"
    # Remove line number
    fix = _remove_line_number(fix)

    print "*******************"
    print "fix ="
    print fix
    print "*******************"
    # Insert the fix
    if kind == 'replace':
        print "if replace 2"
        try:
            check_literals = False  #debug
            if lines[fix_location].count('_<id>_') != fix.count('_<id>_'):
                print "not include original id"
                raise SubstitutionFailedException
            if check_literals:
                print "check literals"
                for lit in ['string', 'char', 'number']:
                    if lines[fix_location].count('_<%s>' % lit) != fix.count(
                            '_<%s>_' % lit):
                        print "not include original literal"
                        raise SubstitutionFailedException

            lines[fix_location] = replace_ids(fix, lines[fix_location])
        except IndexError:
            print "InvalidFixLocationException"
            raise InvalidFixLocationException
    else:
        assert kind == 'insert'
        lines.insert(fix_location + 1, fix)
    print "apply_fix end"
    return recompose_program(lines)
Exemple #15
0
def typo_mutate(mutator_obj,
                prog,
                max_num_mutations,
                num_mutated_progs,
                just_one=False):

    assert len(
        prog
    ) > 10 and max_num_mutations > 0 and num_mutated_progs > 0, "Invalid argument(s) supplied to the function token_mutate_series_network2"
    corrupt_fix_pair = set()

    for _ in range(num_mutated_progs):
        num_mutations = mutator_obj.rng.choice(
            range(max_num_mutations)) + 1 if max_num_mutations > 1 else 1
        this_corrupted = prog
        lines = set()
        mutation_count = 0
        loop_counter = 0
        loop_count_threshold = 50
        mutations = {}

        while mutation_count < num_mutations:
            loop_counter += 1
            if loop_counter == loop_count_threshold:
                print("mutation_count", mutation_count)
                raise LoopCountThresholdExceededException
            line = None

            this_corrupted, line, mutation_name = mutator_obj.easy_mutate(
                this_corrupted)  # line is line_number here!

            if line is not None:
                fix = fetch_line(prog, line)
                corrupt_line = fetch_line(this_corrupted, line)

                if fix != corrupt_line:
                    lines.add(line)
                    mutation_count += 1
                    if line not in mutations:
                        mutations[line] = [mutation_name]
                    else:
                        mutations[line].append(mutation_name)

        assert len(lines) > 0, "Could not mutate!"

        flag_empty_line_in_corrupted = False
        for _line_ in get_lines(this_corrupted):
            if _line_.strip() == '':
                flag_empty_line_in_corrupted = True
                break

        if flag_empty_line_in_corrupted:
            continue

        sorted_lines = sorted(lines)

        for line in sorted_lines:
            fix = fetch_line(prog, line)
            corrupt_line = fetch_line(this_corrupted, line)
            assert len(
                fetch_line(
                    prog, line,
                    include_line_number=False).strip()) != 0, "empty fix"
            assert len(
                fetch_line(this_corrupted, line, include_line_number=False).
                strip()) != 0, "empty corrupted line"
            if fix != corrupt_line:
                corrupt_fix_pair.add((this_corrupted, fix))
                mutator_obj.update_mutation_distribution(mutations[line])

                if just_one:
                    break

                try:
                    this_corrupted = do_fix_at_line(
                        this_corrupted, line,
                        fetch_line(prog, line, include_line_number=False))
                except IndexError:
                    raise

        if len(corrupt_fix_pair) > 0:
            mutator_obj.update_pmf()

    return list(corrupt_fix_pair)