def which_fix_goes_first(program, fix1, fix2): try: fix1_location = extract_line_number(' '.join(fix1.split()[1:])) fix2_location = extract_line_number(' '.join(fix2.split()[1:])) except Exception: #print fix1 #print fix2 raise if not fix_ids_are_in_program(recompose_program(get_lines(program)[fix2_location:]), fix2) and fix_ids_are_in_program(recompose_program(get_lines(program)[fix1_location:]), fix1): return fix1 if not fix_ids_are_in_program(recompose_program(get_lines(program)[fix1_location:]), fix1) and fix_ids_are_in_program(recompose_program(get_lines(program)[fix2_location:]), fix2): return fix2 if not fix_ids_are_in_program(recompose_program(get_lines(program)[fix1_location:]), fix1) and not fix_ids_are_in_program(recompose_program(get_lines(program)[fix2_location:]), fix2): raise CouldNotFindUsesForEitherException if fix1_location < fix2_location: return fix1 elif fix2_location < fix1_location: return fix2 prog_lines = get_lines(program) id_in_fix1 = None id_in_fix2 = None for token in fix1.split(): if '_<id>_' in token: assert id_in_fix1 == None, fix1 id_in_fix1 = token elif token == '_<op>_[': break for token in fix2.split(): if '_<id>_' in token: assert id_in_fix2 == None, fix2 id_in_fix2 = token elif token == '_<op>_[': break assert id_in_fix1 != id_in_fix2, fix1 + ' & ' + fix2 assert fix1_location == fix2_location for i in range(fix1_location, len(prog_lines)): for token in prog_lines[i].split(): if token == id_in_fix1: return fix1 elif token == id_in_fix2: return fix2 assert False, 'unreachable code' raise CouldNotFindUsesForEitherException
def _sanitize_brackets(self, tokens_string): lines = get_lines(tokens_string) if len(lines) == 1: # Should be lines == ['']??? raise EmptyProgramException(tokens_string) for i in range(len(lines) - 1, -1, -1): line = lines[i] if line.strip() == '_<op>_}' or line.strip() == '_<op>_} _<op>_}' \ or line.strip() == '_<op>_} _<op>_} _<op>_}' or line.strip() == '_<op>_} _<op>_;' \ or line.strip() == '_<op>_} _<op>_} _<op>_} _<op>_}' \ or line.strip() == '_<op>_{' \ or line.strip() == '_<op>_{ _<op>_{': if i > 0: lines[i - 1] += ' ' + line.strip() lines[i] = '' else: # can't handle this case! return '' # Remove empty lines for i in range(len(lines) - 1, -1, -1): if lines[i] == '': del lines[i] for line in lines: assert (lines[i].strip() != '') # Should be line instead of lines[i]??? return recompose_program(lines)
def meets_criterion(incorrect_program_tokens, fix, name_dict, type_, name_seq=None, silent=True): lines = get_lines(incorrect_program_tokens) fix = _truncate_fix(fix) if _is_stop_signal(fix): #print 'is stop signal' return False try: fix_line_number = extract_line_number(fix) except Exception: #print 'failed to extract line number from fix' return False if fix_line_number >= len(lines): #print 'localization is pointing to line that doesn\'t exist' return False fix_line = lines[fix_line_number] # Make sure number of IDs is the same if len(re.findall('_<id>_\w*', fix_line)) != len(re.findall('_<id>_\w*', fix)): if not silent: print 'number of ids is not the same' return False keywords_regex = '_<keyword>_\w+|_<type>_\w+|_<APIcall>_\w+|_<include>_\w+' if type_ == 'replace' and re.findall(keywords_regex, fix_line) != re.findall(keywords_regex, fix): if not silent: print 'important words (keywords, etc.) change drastically' return False return True
def meets_criterion(incorrect_program_tokens, fix, type_, silent=True): lines = get_lines(incorrect_program_tokens) fix = _truncate_fix(fix) if _is_stop_signal(fix): return False try: fix_line_number = extract_line_number(fix) except FailedToGetLineNumberException: return False if fix_line_number >= len(lines): return False fix_line = lines[fix_line_number] # Make sure number of IDs is the same if len(re.findall('_<id>_\w*', fix_line)) != len( re.findall('_<id>_\w*', fix)): if not silent: print 'number of ids is not the same' return False keywords_regex = '_<keyword>_\w+|_<type>_\w+|_<APIcall>_\w+|_<include>_\w+' if type_ == 'replace' and re.findall( keywords_regex, fix_line) != re.findall(keywords_regex, fix): if not silent: print 'important words (keywords, etc.) change drastically' return False return True
def add_fix_number(corrupted_prog, fix_number): try: lines = get_lines(corrupted_prog) except Exception: print corrupted_prog raise last_line = '_<directive>_#include _<include>_<FixNumber_%d>' % fix_number lines.append(last_line) return recompose_program(lines)
def do_fix_at_line(corrupted_prog, line, fix): try: lines = get_lines(corrupted_prog) except Exception: print corrupted_prog raise try: lines[line] = fix except IndexError: raise return recompose_program(lines)
def apply_fix(program, fix, kind='replace', check_literals=False): # Break up program string into lines lines = get_lines(program) # Truncate the fix fix = _truncate_fix(fix) # Make sure there are two parts if len(fix.split('~')) != 2: raise InvalidFixLocationException # Retrieve insertion location try: if kind == 'replace': fix_location = extract_line_number(fix) else: assert kind == 'insert' if fix.split()[0] != '_<insertion>_': print "Warning: First token did not suggest insertion (should not happen)" fix_location = extract_line_number(' '.join(fix.split()[1])) except FailedToGetLineNumberException: raise InvalidFixLocationException # Remove line number fix = _remove_line_number(fix) # Insert the fix if kind == 'replace': try: if lines[fix_location].count('_<id>_') != fix.count('_<id>_'): raise SubstitutionFailedException if check_literals: for lit in ['string', 'char', 'number']: if lines[fix_location].count('_<%s>' % lit) != fix.count('_<%s>_' % lit): raise SubstitutionFailedException lines[fix_location] = replace_ids(fix, lines[fix_location]) except IndexError: raise InvalidFixLocationException else: assert kind == 'insert' lines.insert(fix_location+1, fix) return recompose_program(lines)
def undeclare_variable(rng, program_string): # Lines orig_lines = get_lines(program_string) # Variables variables = [] for token in program_string.split(): if '_<id>_' in token and token not in variables: variables.append(token) # Look for a declaration declaration, declaration_pos = find_declaration( rng, variables, list(range(len(orig_lines))), orig_lines) # Find the function signature fix_line = insert_fix(declaration_pos, orig_lines) fix = '_<insertion>_ {} ~ {}'.format(' '.join(str(fix_line)), declaration) # ... if orig_lines[declaration_pos].strip() == '': del orig_lines[declaration_pos] return recompose_program(orig_lines), fix, fix_line
def do_fix_at_line(corrupted_prog, line, fix): try: lines = get_lines(corrupted_prog) except Exception: print corrupted_prog raise if '~' in fix: try: fix = fix.split(' ~ ')[1] fix = fix.strip() except: print fix, fix.split(' ~ ') raise try: lines[line] = fix except IndexError: raise return recompose_program(lines)
def token_mutate_for_tsne_with_specific_errors(prog, num_mutations, action, include_kind=False): assert num_mutations > 0, "Invalid argument(s) supplied to the function token_mutate" global mutator_obj specific_mutate = mutator_obj.specific_mutate corrupt_fix_pair = set() for _ in range(1): this_corrupted = prog lines = set() mutation_count = 0 loop_counter = 0 loop_count_threshold = 50 if include_kind: fix_kinds = {} while(mutation_count < num_mutations): loop_counter += 1 if loop_counter == loop_count_threshold: print "mutation_count", mutation_count raise LoopCountThresholdExceededException line = None if include_kind: this_corrupted, fix, line, kind = specific_mutate(prog, this_corrupted, action, include_kind=True) else: this_corrupted, fix, line = specific_mutate(prog, this_corrupted, action) if line is not None: fix = fetch_line(prog, line) corrupt_line = fetch_line(this_corrupted, line) if fix != corrupt_line: lines.add(line) mutation_count += 1 if include_kind: if str(line) not in fix_kinds: fix_kinds[str(line)] = [kind] else: fix_kinds[str(line)].append(kind) assert len(lines) > 0, "Could not mutate!" empty_line_in_corrupted = False for _line_ in get_lines(this_corrupted): if _line_.strip() == '': empty_line_in_corrupted = True break if empty_line_in_corrupted: continue sorted_lines = sorted(lines) for line in sorted_lines: fix = fetch_line(prog, line) corrupt_line = fetch_line(this_corrupted, line) assert len(fetch_line(prog, line, include_line_number=False).strip()) != 0, "empty fix" assert len(fetch_line(this_corrupted, line, include_line_number=False).strip()) != 0, "empty corrupted line" if fix != corrupt_line: corrupt_fix_pair.add((this_corrupted, fix)) break return list(corrupt_fix_pair)
def token_mutate_series_any_fix(prog, max_num_mutations, num_mutated_progs, include_kind=False): assert max_num_mutations > 0 and num_mutated_progs > 0, "Invalid argument(s) supplied to the function token_mutate" global mutator_obj corrupt_fix_pair = set() for _ in range(num_mutated_progs): num_mutations = random.choice(range(max_num_mutations)) + 1 this_corrupted = prog lines = set() mutation_count = 0 loop_counter = 0 loop_count_threshold = 50 if include_kind: fix_kinds = {} while(mutation_count < num_mutations): loop_counter += 1 if loop_counter == loop_count_threshold: print "mutation_count", mutation_count raise LoopCountThresholdExceededException line = None if include_kind: this_corrupted, fix, line, kind = mutator_obj.easy_mutate2(prog, this_corrupted, include_kind=True) else: this_corrupted, fix, line = mutator_obj.easy_mutate2(prog, this_corrupted) if line is not None: fix = fetch_line(prog, line) corrupt_line = fetch_line(this_corrupted, line) if fix != corrupt_line: lines.add(line) mutation_count += 1 if include_kind: if str(line) not in fix_kinds: fix_kinds[str(line)] = [kind] else: fix_kinds[str(line)].append(kind) assert len(lines) > 0, "Could not mutate!" flag_empty_line_in_corrupted = False for _line_ in get_lines(this_corrupted): if _line_.strip() == '': flag_empty_line_in_corrupted = True break if flag_empty_line_in_corrupted: continue lines = sorted(lines) ranked_lines = map(lambda x:(x,lines.index(x)+1), lines) random.shuffle(ranked_lines) random.shuffle(lines) for line, fix_number in ranked_lines: fix = fetch_line(prog, line) corrupt_line = fetch_line(this_corrupted, line) assert len(fetch_line(prog, line, include_line_number=False).strip()) != 0, "empty fix" assert len(fetch_line(this_corrupted, line, include_line_number=False).strip()) != 0, "empty corrupted line" if fix != corrupt_line: if include_kind: if len(fix_kinds[str(line)]) == 1: # remove later for kind in fix_kinds[str(line)]: corrupt_fix_pair.add((this_corrupted, fix, fix_number, kind)) else: corrupt_fix_pair.add((this_corrupted, fix, fix_number)) try: this_corrupted = do_fix_at_line(this_corrupted, line, fetch_line(prog, line, include_line_number=False)) except IndexError: raise if include_kind: return map( lambda (w,x,y,z):(add_fix_number(w, y), x, z), list(corrupt_fix_pair)) else: return map( lambda (w,x,y):(add_fix_number(w, y), x), list(corrupt_fix_pair))
def undeclare_variable(rng, old_program, program_string, deleted_ids, name_dict=None, print_debug_messages=False): if name_dict is not None: rev_name_dict = get_rev_dict(name_dict) # Lines orig_lines = get_lines(program_string) old_lines = get_lines(old_program) # Lines to ignore struct_lines = [] structs_deep = 0 for i, line in enumerate(orig_lines): if len(re.findall('_<keyword>_struct _<id>_\d@ _<op>_\{', line)) > 0 or \ len(re.findall('_<keyword>_union _<id>_\d@ _<op>_\{', line)) > 0 or \ len(re.findall('_<keyword>_enum _<id>_\d@ _<op>_\{', line)) > 0: structs_deep += len(re.findall('_<op>_\{', line)) elif structs_deep > 0: structs_deep += len(re.findall('_<op>_\{', line)) structs_deep -= len(re.findall('_<op>_\}', line)) assert structs_deep >= 0, str(structs_deep) + " " + line struct_lines.append(i) global_lines = [] brackets_deep = 0 for i, line in enumerate(orig_lines): if len(re.findall('_<op>_\{', line)) > 0 or len( re.findall('_<op>_\}', line)) > 0: brackets_deep += len(re.findall('_<op>_\{', line)) brackets_deep -= len(re.findall('_<op>_\}', line)) assert brackets_deep >= 0, str(brackets_deep) + " " + line elif brackets_deep == 0: global_lines.append(i) if print_debug_messages: print 'Ignoring lines:', struct_lines print 'Ignoring lines:', global_lines for line in sorted(set(struct_lines + global_lines)): print "-", orig_lines[line] # Variables variables = [] for token in program_string.split(): if '_<id>_' in token: if token not in variables: variables.append(token) assert len(orig_lines) == len(old_lines) # Look for a declaration done = False rng.shuffle(variables) for to_undeclare in variables: if print_debug_messages: print 'Looking for:', rev_name_dict[to_undeclare], '...' # Find a location (scope) to undeclare it from shuffled_lines = list( set(range(len(orig_lines))) - set(struct_lines + global_lines)) rng.shuffle(shuffled_lines) # NEW regex_alone_use = '(_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)((?: _<op>_\*)* %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])*)(?: _<op>_= [^,;]+)(?: _<op>_;)' % to_undeclare regex_alone = '((?:_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)(?: _<op>_\*)* %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])* _<op>_;)' % to_undeclare regex_group_leader = '((?:_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)(?: _<op>_\*)*)( %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])*)(?: _<op>_= [^,;]+)?( _<op>_,)(?:(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)? _<op>_,)*(?:(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)? _<op>_;)' % to_undeclare regex_group = '(_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?(?: _<op>_,(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?)*( _<op>_,(?: _<op>_\*)* %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?)(?: _<op>_,(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?)*(?: _<op>_;)' % to_undeclare fix_line = None declaration = None declaration_pos = None # Start our search upwards for i in shuffled_lines: if len(re.findall(regex_alone_use, orig_lines[i])) == 1: if print_debug_messages: print("On line %d:" % i), tokens_to_source( orig_lines[i], name_dict, clang_format=True) print "Found Alone use", re.findall( regex_alone_use, orig_lines[i]) m = re.search(regex_alone_use, orig_lines[i]) declaration = orig_lines[i][m.start(1):m.end(2)] + ' _<op>_;' declaration_pos = i # Mutate orig_lines[i] = orig_lines[i][:m.start(1)] + orig_lines[i][ m.end(1) + 1:] done = True break if len(re.findall(regex_alone, orig_lines[i])) == 1: if print_debug_messages: print("On line %d:" % i), tokens_to_source( orig_lines[i], name_dict, clang_format=True) print "Found Alone", re.findall(regex_alone, orig_lines[i]) m = re.search(regex_alone, orig_lines[i]) declaration = orig_lines[i][m.start(1):m.end(1)] declaration_pos = i # Mutate orig_lines[i] = orig_lines[i][:m.start(1)] + orig_lines[i][ m.end(1) + 1:] done = True break elif len(re.findall(regex_group, orig_lines[i])) == 1: if print_debug_messages: print("On line %d:" % i), tokens_to_source( orig_lines[i], name_dict, clang_format=True) print "Found Group", re.findall(regex_group, orig_lines[i]) m = re.search(regex_group, orig_lines[i]) declaration = orig_lines[i][m.start(1):m.end(1)] + orig_lines[ i][m.start(2):m.end(2)][8:] + ' _<op>_;' declaration_pos = i try: end_of_declr = declaration.index('_<op>_=') declaration = declaration[:end_of_declr] except ValueError: pass # Mutate orig_lines[i] = orig_lines[i][:m.start(2) + 1] + orig_lines[i][m.end(2) + 1:] done = True break elif len(re.findall(regex_group_leader, orig_lines[i])) == 1: if print_debug_messages: print("On line %d:" % i), tokens_to_source( orig_lines[i], name_dict, clang_format=True) print "Found Group Leader", re.findall( regex_group_leader, orig_lines[i]) m = re.search(regex_group_leader, orig_lines[i]) declaration = orig_lines[i][m.start(1):m.end(2)] + ' _<op>_;' declaration_pos = i # Mutate orig_lines[i] = orig_lines[i][:m.start(2) + 1] + orig_lines[i][m.end(3) + 1:] done = True break if done: break if not done: # Failed to find something to undeclare raise NothingToMutateException # Find the function signature fn_regex = '(?:_<keyword>_(?:struct|union|enum) _<id>_\d+@|_<type>_\w+|_<keyword>_void)(?: _<op>_\*)* (?:_<id>_\d+@|_<APIcall>_main) _<op>_\(' fn_start_regex = '_<op>_\{' inserted = False assert declaration_pos != None for i in range(declaration_pos, 0, -1): if len(re.findall(fn_regex, old_lines[i])) == 1: for j in range(i, len(old_lines)): if len(re.findall(fn_start_regex, old_lines[i])) >= 1: fix_line = j break inserted = True if inserted: break if not inserted: # print Failed to insert fix raise FailedToMutateException if fix_line is None: # Couldn't find { after function definition raise FailedToMutateException fix = '_<insertion>_ ' assert fix_line is not None for digit in str(fix_line): fix += str(digit) + ' ' fix += '~ ' + declaration to_delete = False if orig_lines[declaration_pos].strip() == '': to_delete = declaration_pos del orig_lines[to_delete] recomposed_program = '' for i, line in enumerate(orig_lines): for digit in str(i): recomposed_program += digit + ' ' recomposed_program += '~ ' recomposed_program += line + ' ' return recomposed_program, fix, fix_line
def undeclare_variable(rng, old_program, program_string): # Lines orig_lines = get_lines(program_string) old_lines = get_lines(old_program) # Lines to ignore struct_lines = [] structs_deep = 0 for i, line in enumerate(orig_lines): # Should be _<id>_\d+ ??? if len(re.findall('_<keyword>_struct _<id>_\d@ _<op>_\{', line)) > 0 or \ len(re.findall('_<keyword>_union _<id>_\d@ _<op>_\{', line)) > 0 or \ len(re.findall('_<keyword>_enum _<id>_\d@ _<op>_\{', line)) > 0: structs_deep += len(re.findall('_<op>_\{', line)) elif structs_deep > 0: structs_deep += len(re.findall('_<op>_\{', line)) structs_deep -= len(re.findall('_<op>_\}', line)) assert structs_deep >= 0, str(structs_deep) + " " + line struct_lines.append(i) global_lines = [] brackets_deep = 0 for i, line in enumerate(orig_lines): if len(re.findall('_<op>_\{', line)) > 0 or len( re.findall('_<op>_\}', line)) > 0: brackets_deep += len(re.findall('_<op>_\{', line)) brackets_deep -= len(re.findall('_<op>_\}', line)) assert brackets_deep >= 0, str(brackets_deep) + " " + line elif brackets_deep == 0: global_lines.append(i) # Variables variables = [] for token in program_string.split(): if '_<id>_' in token: if token not in variables: variables.append(token) # Look for a declaration done = False rng.shuffle(variables) for to_undeclare in variables: # Find a location (scope) to undeclare it from shuffled_lines = list( set(range(len(orig_lines))) - set(struct_lines + global_lines)) rng.shuffle(shuffled_lines) # NEW # Should consider const case and typedef??? regex_alone_use = '(_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)((?: _<op>_\*)* %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])*)(?: _<op>_= [^,;]+)(?: _<op>_;)' % to_undeclare regex_alone = '((?:_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)(?: _<op>_\*)* %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])* _<op>_;)' % to_undeclare regex_group_leader = '((?:_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)(?: _<op>_\*)*)( %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])*)(?: _<op>_= [^,;]+)?( _<op>_,)(?:(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)? _<op>_,)*(?:(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)? _<op>_;)' % to_undeclare regex_group = '(_<keyword>_(?:struct|enum|union) _<id>_\d+@|_<type>_\w+)(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?(?: _<op>_,(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?)*( _<op>_,(?: _<op>_\*)* %s(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?)(?: _<op>_,(?: _<op>_\*)* _<id>_\d+@(?: _<op>_\[(?: [^\]]+)? _<op>_\])*(?: _<op>_= [^,;]+)?)*(?: _<op>_;)' % to_undeclare fix_line = None declaration = None declaration_pos = None # Start our search upwards for i in shuffled_lines: if len(re.findall(regex_alone_use, orig_lines[i])) == 1: m = re.search(regex_alone_use, orig_lines[i]) declaration = orig_lines[i][m.start(1):m.end(2)] + ' _<op>_;' declaration_pos = i # Mutate orig_lines[i] = orig_lines[i][:m.start(1)] + orig_lines[i][ m.end(1) + 1:] done = True break if len(re.findall(regex_alone, orig_lines[i])) == 1: m = re.search(regex_alone, orig_lines[i]) declaration = orig_lines[i][m.start(1):m.end(1)] declaration_pos = i # Mutate orig_lines[i] = orig_lines[i][:m.start(1)] + orig_lines[i][ m.end(1) + 1:] done = True break elif len(re.findall(regex_group, orig_lines[i])) == 1: m = re.search(regex_group, orig_lines[i]) declaration = orig_lines[i][m.start(1):m.end(1)] + orig_lines[ i][m.start(2):m.end(2)][8:] + ' _<op>_;' declaration_pos = i try: end_of_declr = declaration.index('_<op>_=') declaration = declaration[:end_of_declr] except ValueError: pass # Mutate orig_lines[i] = orig_lines[i][:m.start(2) + 1] + orig_lines[i][m.end(2) + 1:] done = True break elif len(re.findall(regex_group_leader, orig_lines[i])) == 1: m = re.search(regex_group_leader, orig_lines[i]) declaration = orig_lines[i][m.start(1):m.end(2)] + ' _<op>_;' declaration_pos = i # Mutate orig_lines[i] = orig_lines[i][:m.start(2) + 1] + orig_lines[i][m.end(3) + 1:] done = True break if not done: # Failed to find something to undeclare raise NothingToMutateException # Find the function signature fn_regex = '(?:_<keyword>_(?:struct|union|enum) _<id>_\d+@|_<type>_\w+|_<keyword>_void)(?: _<op>_\*)* (?:_<id>_\d+@|_<APIcall>_main) _<op>_\(' fn_start_regex = '_<op>_\{' inserted = False assert declaration_pos is not None # Why 0 instead of -1??? for i in range(declaration_pos, 0, -1): if len(re.findall(fn_regex, old_lines[i])) == 1: for j in range(i, len(old_lines)): # Why i instead of j? if len(re.findall(fn_start_regex, old_lines[i])) >= 1: fix_line = j break inserted = True if inserted: break # ^ May boom: int x = 0; /*eol*/ int y = x; if not inserted: # print Failed to insert fix raise FailedToMutateException if fix_line is None: # Couldn't find { after function definition raise FailedToMutateException fix = '_<insertion>_ ' assert fix_line is not None for digit in str(fix_line): fix += str(digit) + ' ' fix += '~ ' + declaration if orig_lines[declaration_pos].strip() == '': to_delete = declaration_pos del orig_lines[to_delete] recomposed_program = '' for i, line in enumerate(orig_lines): for digit in str(i): recomposed_program += digit + ' ' recomposed_program += '~ ' recomposed_program += line + ' ' return recomposed_program, fix, fix_line
def apply_fix(program, fix, kind='replace', check_literals=False): print "apply_fix passed" # Break up program string into lines lines = get_lines(program) print "*******************" print "lines =" print lines print "*******************" print "lines length :", len(lines) # Truncate the fix fix = _truncate_fix(fix) print "*******************" print "fix =" print fix print "*******************" print "fix.split('~') :", fix.split('~') print "len(fix.split('~')) :", len(fix.split('~')) # Make sure there are two parts if len(fix.split('~')) != 2: print "InvalidFixLocationExeption" print "can not split 2 part" raise InvalidFixLocationException print "Retrieve insertion location" # Retrieve insertion location try: print "if replace 1" if kind == 'replace': fix_location = extract_line_number(fix) print "kind == replace" print "*******************" print "fix_location =" print fix_location print "*******************" else: assert kind == 'insert' if fix.split()[0] != '_<insertion>_': print "Warning: First token did not suggest insertion (should not happen)" fix_location = extract_line_number(' '.join(fix.split()[1])) print "*******************" print "fix_location ==" print fix_location print "*******************" except FailedToGetLineNumberException: raise InvalidFixLocationException print "Remove line number" # Remove line number fix = _remove_line_number(fix) print "*******************" print "fix =" print fix print "*******************" # Insert the fix if kind == 'replace': print "if replace 2" try: check_literals = False #debug if lines[fix_location].count('_<id>_') != fix.count('_<id>_'): print "not include original id" raise SubstitutionFailedException if check_literals: print "check literals" for lit in ['string', 'char', 'number']: if lines[fix_location].count('_<%s>' % lit) != fix.count( '_<%s>_' % lit): print "not include original literal" raise SubstitutionFailedException lines[fix_location] = replace_ids(fix, lines[fix_location]) except IndexError: print "InvalidFixLocationException" raise InvalidFixLocationException else: assert kind == 'insert' lines.insert(fix_location + 1, fix) print "apply_fix end" return recompose_program(lines)
def typo_mutate(mutator_obj, prog, max_num_mutations, num_mutated_progs, just_one=False): assert len( prog ) > 10 and max_num_mutations > 0 and num_mutated_progs > 0, "Invalid argument(s) supplied to the function token_mutate_series_network2" corrupt_fix_pair = set() for _ in range(num_mutated_progs): num_mutations = mutator_obj.rng.choice( range(max_num_mutations)) + 1 if max_num_mutations > 1 else 1 this_corrupted = prog lines = set() mutation_count = 0 loop_counter = 0 loop_count_threshold = 50 mutations = {} while mutation_count < num_mutations: loop_counter += 1 if loop_counter == loop_count_threshold: print("mutation_count", mutation_count) raise LoopCountThresholdExceededException line = None this_corrupted, line, mutation_name = mutator_obj.easy_mutate( this_corrupted) # line is line_number here! if line is not None: fix = fetch_line(prog, line) corrupt_line = fetch_line(this_corrupted, line) if fix != corrupt_line: lines.add(line) mutation_count += 1 if line not in mutations: mutations[line] = [mutation_name] else: mutations[line].append(mutation_name) assert len(lines) > 0, "Could not mutate!" flag_empty_line_in_corrupted = False for _line_ in get_lines(this_corrupted): if _line_.strip() == '': flag_empty_line_in_corrupted = True break if flag_empty_line_in_corrupted: continue sorted_lines = sorted(lines) for line in sorted_lines: fix = fetch_line(prog, line) corrupt_line = fetch_line(this_corrupted, line) assert len( fetch_line( prog, line, include_line_number=False).strip()) != 0, "empty fix" assert len( fetch_line(this_corrupted, line, include_line_number=False). strip()) != 0, "empty corrupted line" if fix != corrupt_line: corrupt_fix_pair.add((this_corrupted, fix)) mutator_obj.update_mutation_distribution(mutations[line]) if just_one: break try: this_corrupted = do_fix_at_line( this_corrupted, line, fetch_line(prog, line, include_line_number=False)) except IndexError: raise if len(corrupt_fix_pair) > 0: mutator_obj.update_pmf() return list(corrupt_fix_pair)