def token_mutate_series_any_fix(prog, max_num_mutations, num_mutated_progs, include_kind=False): assert max_num_mutations > 0 and num_mutated_progs > 0, "Invalid argument(s) supplied to the function token_mutate" global mutator_obj corrupt_fix_pair = set() for _ in range(num_mutated_progs): num_mutations = random.choice(range(max_num_mutations)) + 1 this_corrupted = prog lines = set() mutation_count = 0 loop_counter = 0 loop_count_threshold = 50 if include_kind: fix_kinds = {} while(mutation_count < num_mutations): loop_counter += 1 if loop_counter == loop_count_threshold: print "mutation_count", mutation_count raise LoopCountThresholdExceededException line = None if include_kind: this_corrupted, fix, line, kind = mutator_obj.easy_mutate2(prog, this_corrupted, include_kind=True) else: this_corrupted, fix, line = mutator_obj.easy_mutate2(prog, this_corrupted) if line is not None: fix = fetch_line(prog, line) corrupt_line = fetch_line(this_corrupted, line) if fix != corrupt_line: lines.add(line) mutation_count += 1 if include_kind: if str(line) not in fix_kinds: fix_kinds[str(line)] = [kind] else: fix_kinds[str(line)].append(kind) assert len(lines) > 0, "Could not mutate!" flag_empty_line_in_corrupted = False for _line_ in get_lines(this_corrupted): if _line_.strip() == '': flag_empty_line_in_corrupted = True break if flag_empty_line_in_corrupted: continue lines = sorted(lines) ranked_lines = map(lambda x:(x,lines.index(x)+1), lines) random.shuffle(ranked_lines) random.shuffle(lines) for line, fix_number in ranked_lines: fix = fetch_line(prog, line) corrupt_line = fetch_line(this_corrupted, line) assert len(fetch_line(prog, line, include_line_number=False).strip()) != 0, "empty fix" assert len(fetch_line(this_corrupted, line, include_line_number=False).strip()) != 0, "empty corrupted line" if fix != corrupt_line: if include_kind: if len(fix_kinds[str(line)]) == 1: # remove later for kind in fix_kinds[str(line)]: corrupt_fix_pair.add((this_corrupted, fix, fix_number, kind)) else: corrupt_fix_pair.add((this_corrupted, fix, fix_number)) try: this_corrupted = do_fix_at_line(this_corrupted, line, fetch_line(prog, line, include_line_number=False)) except IndexError: raise if include_kind: return map( lambda (w,x,y,z):(add_fix_number(w, y), x, z), list(corrupt_fix_pair)) else: return map( lambda (w,x,y):(add_fix_number(w, y), x), list(corrupt_fix_pair))
def token_mutate_for_tsne_with_specific_errors(prog, num_mutations, action, include_kind=False): assert num_mutations > 0, "Invalid argument(s) supplied to the function token_mutate" global mutator_obj specific_mutate = mutator_obj.specific_mutate corrupt_fix_pair = set() for _ in range(1): this_corrupted = prog lines = set() mutation_count = 0 loop_counter = 0 loop_count_threshold = 50 if include_kind: fix_kinds = {} while(mutation_count < num_mutations): loop_counter += 1 if loop_counter == loop_count_threshold: print "mutation_count", mutation_count raise LoopCountThresholdExceededException line = None if include_kind: this_corrupted, fix, line, kind = specific_mutate(prog, this_corrupted, action, include_kind=True) else: this_corrupted, fix, line = specific_mutate(prog, this_corrupted, action) if line is not None: fix = fetch_line(prog, line) corrupt_line = fetch_line(this_corrupted, line) if fix != corrupt_line: lines.add(line) mutation_count += 1 if include_kind: if str(line) not in fix_kinds: fix_kinds[str(line)] = [kind] else: fix_kinds[str(line)].append(kind) assert len(lines) > 0, "Could not mutate!" empty_line_in_corrupted = False for _line_ in get_lines(this_corrupted): if _line_.strip() == '': empty_line_in_corrupted = True break if empty_line_in_corrupted: continue sorted_lines = sorted(lines) for line in sorted_lines: fix = fetch_line(prog, line) corrupt_line = fetch_line(this_corrupted, line) assert len(fetch_line(prog, line, include_line_number=False).strip()) != 0, "empty fix" assert len(fetch_line(this_corrupted, line, include_line_number=False).strip()) != 0, "empty corrupted line" if fix != corrupt_line: corrupt_fix_pair.add((this_corrupted, fix)) break return list(corrupt_fix_pair)
def typo_mutate(mutator_obj, prog, max_num_mutations, num_mutated_progs, just_one=False): assert len( prog ) > 10 and max_num_mutations > 0 and num_mutated_progs > 0, "Invalid argument(s) supplied to the function token_mutate_series_network2" corrupt_fix_pair = set() for _ in range(num_mutated_progs): num_mutations = mutator_obj.rng.choice( range(max_num_mutations)) + 1 if max_num_mutations > 1 else 1 this_corrupted = prog lines = set() mutation_count = 0 loop_counter = 0 loop_count_threshold = 50 mutations = {} while mutation_count < num_mutations: loop_counter += 1 if loop_counter == loop_count_threshold: print("mutation_count", mutation_count) raise LoopCountThresholdExceededException line = None this_corrupted, line, mutation_name = mutator_obj.easy_mutate( this_corrupted) # line is line_number here! if line is not None: fix = fetch_line(prog, line) corrupt_line = fetch_line(this_corrupted, line) if fix != corrupt_line: lines.add(line) mutation_count += 1 if line not in mutations: mutations[line] = [mutation_name] else: mutations[line].append(mutation_name) assert len(lines) > 0, "Could not mutate!" flag_empty_line_in_corrupted = False for _line_ in get_lines(this_corrupted): if _line_.strip() == '': flag_empty_line_in_corrupted = True break if flag_empty_line_in_corrupted: continue sorted_lines = sorted(lines) for line in sorted_lines: fix = fetch_line(prog, line) corrupt_line = fetch_line(this_corrupted, line) assert len( fetch_line( prog, line, include_line_number=False).strip()) != 0, "empty fix" assert len( fetch_line(this_corrupted, line, include_line_number=False). strip()) != 0, "empty corrupted line" if fix != corrupt_line: corrupt_fix_pair.add((this_corrupted, fix)) mutator_obj.update_mutation_distribution(mutations[line]) if just_one: break try: this_corrupted = do_fix_at_line( this_corrupted, line, fetch_line(prog, line, include_line_number=False)) except IndexError: raise if len(corrupt_fix_pair) > 0: mutator_obj.update_pmf() return list(corrupt_fix_pair)