def test_intra_method_pairs(self): len_pairs = [ 9, 8, 13, 19, 3, 20, 8, 5, 18, 16, 12, 19, 3, 18, 18, 24, 9, 2, 8 ] project_root = CLEAN_LINKED_LIST_ROOT trace_root = create_new_temp_dir() exclude_folders = ["venv"] cfg = ProjectCFG.create_from_path(project_root, exclude_folders=exclude_folders) thorough.run_tests(CLEAN_LINKED_LIST_ROOT, trace_root, exclude_folders) trace_reader = TraceReader(trace_root) cppvi = VarIndexFactory.new_cpp_index(project_root, trace_root) ll_py = str(CLEAN_LINKED_LIST_LL) def get_pairs(trace_file_path): np_array, _ = read_as_np_array(trace_file_path) idx_pairs = analyze_trace_w_index(trace_file_path, cppvi) def rename_vars(s): return {(el[0], el[1]) for el in s} idx_pairs = rename_vars(idx_pairs) return idx_pairs node_ids, paths = trace_reader.get_traces_for(ll_py) for node_id, path, expected_pairs_count in zip(node_ids, paths, len_pairs): pairs = get_pairs(path) self.assertEqual( expected_pairs_count, len(pairs), "Pairs count don't match for test case: {}".format(node_id))
def test_branch_coverage(self): project_root = CLEAN_LINKED_LIST_ROOT # trace_root = create_new_temp_dir() trace_root = CLEAN_LINKED_LIST_ROOT exclude_folders = ["venv", "dataset"] cfg = ProjectCFG.create_from_path(project_root, exclude_folders=exclude_folders, use_cached_if_possible=False) thorough.run_tests(CLEAN_LINKED_LIST_ROOT, trace_root, exclude_folders) trace_reader = TraceReader(trace_root) ll_py = str(CLEAN_LINKED_LIST_LL) ll_py_cfg = cfg.module_cfgs[ll_py] total_exercised = set() available_branches = ll_py_cfg.branches for node_id, path in zip(*trace_reader.get_traces_for(ll_py)): df, size = read_as_dataframe(path) covered = find_covered_branches(df, ll_py_cfg.branches) total_exercised.update(covered) print("Coverage") print_percent("Branches covered", total_exercised, ll_py_cfg.branches) print(available_branches) print(total_exercised) not_exercised_branches = set(available_branches) - set(total_exercised) print("Not exercised branches total ({}): ".format(len(not_exercised_branches)), not_exercised_branches) self.assertEqual(13, len(not_exercised_branches))
def run_mutation_experiment_fixed_coverage(project_root, module_under_test_path, coverage_boundaries_count=20, max_trace_size=10, timeout=None, coverage_metrics=None, support=100): logger.debug("Running mutation experiment (fixed coverage) for {module}", module=module_under_test_path) trace_reader = TraceReader(project_root) not_failing_node_ids = trace_reader.get_not_failing_node_ids( module_under_test_path) scoring_function, total_mutants_count = get_mutation_scoring_function( project_root, module_under_test_path, not_failing_node_ids, timeout=timeout) points = generic_experiment_coverage( project_root, module_under_test_path, [scoring_function], coverage_boundaries_count=coverage_boundaries_count, max_trace_size=max_trace_size, coverage_metrics=coverage_metrics, support=support) df = pd.DataFrame( data=points, columns=[SUITE_SIZE, METRIC, MUTATION_SCORE, SUITE_COVERAGE]) df = bin_zero_to_one_column_to_percent(df, SUITE_COVERAGE, SUITE_COVERAGE_BIN, coverage_boundaries_count) return df, total_mutants_count
def trace_this(function, project_root=PROJECT_ROOT, trace_root=TEMP_DIRECTORY, args=None, kwargs=None): if args is None: args = [] if kwargs is None: kwargs = {} target_source_file = inspect.getsourcefile(function) keep = [str(project_root)] exclude = [str(project_root / "venv")] trace_folder = trace_root tracer = Tracer(keep, exclude, trace_folder_parent=str(trace_folder)) trace_name = function.__name__ tracer.start(trace_name) function(*args, **kwargs) tracer.stop() tracer.full_stop() trace_reader = TraceReader(trace_root) return trace_reader.trace_path(trace_name, target_source_file)
def run_mutation_experiment_fixed_size(project_root, module_under_test_path, test_suite_sizes_count=30, test_suite_sizes=None, max_trace_size=10, timeout=None, coverage_metrics=None, support=100): logger.debug("Running mutation experiment (fixed size) for {module}", module=module_under_test_path) trace_reader = TraceReader(project_root) not_failing_node_ids = trace_reader.get_not_failing_node_ids( module_under_test_path) mutation_scoring_function, total_mutants_count = get_mutation_scoring_function( project_root, module_under_test_path, not_failing_node_ids=not_failing_node_ids, timeout=timeout) points = generic_experiment_size( project_root, module_under_test_path, [mutation_scoring_function], test_suite_sizes_count=test_suite_sizes_count, test_suite_sizes=test_suite_sizes, max_trace_size=max_trace_size, coverage_metrics=coverage_metrics, node_ids=tuple(not_failing_node_ids), support=support) df = pd.DataFrame( data=points, columns=[SUITE_SIZE, METRIC, MUTATION_SCORE, SUITE_COVERAGE]) return df, total_mutants_count
def run_real_bugs_experiment_fixed_size(project_root, module_under_test_path, revealing_node_ids, test_suite_sizes_count=30, test_suite_sizes=None, max_trace_size=10, coverage_metrics=None, support=100): logger.debug("Running real project experiment for {module} (fixed size)", module=module_under_test_path) revealing_node_ids = set(revealing_node_ids) total_revealing_node_ids = len(revealing_node_ids) trace_reader = TraceReader(project_root) not_failing_node_ids = trace_reader.get_not_failing_node_ids( module_under_test_path) scoring_function = get_bugs_scoring_function(revealing_node_ids) points = generic_experiment_size( project_root, module_under_test_path, [scoring_function], test_suite_sizes_count=test_suite_sizes_count, test_suite_sizes=test_suite_sizes, max_trace_size=max_trace_size, coverage_metrics=coverage_metrics, node_ids=set(not_failing_node_ids) | set(revealing_node_ids), support=support) df = pd.DataFrame( data=points, columns=[SUITE_SIZE, METRIC, BUG_REVEALED_SCORE, SUITE_COVERAGE]) return df, total_revealing_node_ids
def test_failed_tests_recorded(self): project_root = BUGGY_LINKED_LIST_ROOT exclude_folders = ["venv"] thorough_location = str(PROJECT_ROOT / "thorough.py") subprocess.run(f"python3 {thorough_location} -t --trace_dir {str(project_root)}", cwd=project_root, shell=True) trace_root = project_root trace_reader = TraceReader(trace_root) failed_cases = trace_reader.read_failed_test_cases() self.assertEqual(1, len(failed_cases)) expected_failed_test_case = "tests/test_linked_list_module.py::LinkedListTest::test_append_on_removed" self.assertEqual(expected_failed_test_case, failed_cases[0])
def __init__(self, trace_root, project_root, exclude_folders=None, max_trace_size=None): self.project_cfg = ProjectCFG.create_from_path( project_root, exclude_folders=exclude_folders, use_cached_if_possible=True) self.max_trace_size = max_trace_size self.trace_root = trace_root self.trace_reader = TraceReader(trace_root) self.vi = VarIndexFactory.new_py_index(project_root, trace_root) self.cppvi = VarIndexFactory.new_cpp_index(project_root, trace_root)
def test_inter_method_pairs_test_interclass_pairs(self): len_im_pairs = [ 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 ] len_ic_pairs = [ 5, 2, 5, 4, 1, 4, 2, 1, 5, 5, 7, 4, 1, 10, 7, 5, 9, 1, 4 ] project_root = CLEAN_LINKED_LIST_ROOT trace_root = create_new_temp_dir() exclude_folders = ["venv"] cfg = ProjectCFG.create_from_path(project_root, exclude_folders=exclude_folders) thorough.run_tests(CLEAN_LINKED_LIST_ROOT, trace_root, exclude_folders) trace_reader = TraceReader(trace_root) cppvi = VarIndexFactory.new_py_index(project_root, trace_root) ll_py = str(CLEAN_LINKED_LIST_LL) def get_pairs(trace_file_path): np_array, _ = read_as_np_array(trace_file_path) scopes = read_scopes_for_trace_file(trace_file_path) im_pairs, ic_pairs = analyze(trace_file_path, cppvi, scopes) def rename_vars(s): return {(el[0], el[1]) for el in s} im_pairs = rename_vars(im_pairs) ic_pairs = rename_vars(ic_pairs) return im_pairs, ic_pairs node_ids, paths = trace_reader.get_traces_for(ll_py) for node_id, path, expected_im_len, expected_ic_len in zip( node_ids, paths, len_im_pairs, len_ic_pairs): im_pairs, ic_pairs = get_pairs(path) self.assertEqual( expected_im_len, len(im_pairs), "Intermethod pairs count don't match for test case: {}".format( node_id)) self.assertEqual( expected_ic_len, len(ic_pairs), "Intermethod pairs count don't match for test case: {}".format( node_id))
def combined_experiment_fixed_coverage(project_root, module_under_test_path, revealing_node_ids, coverage_boundaries_count=20, max_trace_size=10, coverage_metrics=None, support=100, timeout=None): logger.debug( "Running real project experiment for {module} (fixed coverage)", module=module_under_test_path) trace_reader = TraceReader(project_root) not_failing_node_ids = trace_reader.get_not_failing_node_ids( module_under_test_path) mutation_scoring_function, total_mutants_count = get_mutation_scoring_function( project_root, module_under_test_path, not_failing_node_ids=not_failing_node_ids, timeout=timeout) bugs_scoring_function = get_bugs_scoring_function(revealing_node_ids) total_bugs_count = len(revealing_node_ids) points = generic_experiment_coverage( project_root, module_under_test_path, [mutation_scoring_function, bugs_scoring_function], coverage_boundaries_count=coverage_boundaries_count, max_trace_size=max_trace_size, coverage_metrics=coverage_metrics, node_ids=set(revealing_node_ids) | set(not_failing_node_ids), support=support) columns = [ SUITE_SIZE, METRIC, MUTATION_SCORE, BUG_REVEALED_SCORE, SUITE_COVERAGE ] df = pd.DataFrame(data=points, columns=columns) df = bin_zero_to_one_column_to_percent(df, SUITE_COVERAGE, SUITE_COVERAGE_BIN, coverage_boundaries_count) return df, total_mutants_count, total_bugs_count
def test_mutate_linked_list_module(self): project_root = Path( __file__).parent.parent.parent / "dataset/linked_list_clean" module_under_test_path = project_root / "core" / "ll.py" trace_reader = TraceReader(project_root) not_failing_node_ids = trace_reader.get_not_failing_node_ids( module_under_test_path) killed, total = killed_mutants( project_root=str(project_root), module_under_test_path=str(module_under_test_path), not_failing_node_ids=not_failing_node_ids, timeout=None) s = set() for m in killed.values(): s.update(m) self.assertEqual(46, total) self.assertEqual(30, len(s))
def generic_experiment_size(project_root, module_under_test_path, scoring_functions, test_suite_sizes_count=30, test_suite_sizes=None, max_trace_size=10, coverage_metrics=None, node_ids=None, support=100): logger.debug("Running mutation experiment for {module}", module=module_under_test_path) if coverage_metrics is None: coverage_metrics = DEFAULT_METRICS cfg = ProjectCFG.create_from_path(project_root, exclude_folders=COMMON_EXCLUDE) if node_ids is None: trace_reader = TraceReader(project_root) node_ids, paths = trace_reader.get_traces_for(module_under_test_path) total_cases = len(node_ids) if test_suite_sizes is None: if total_cases < test_suite_sizes_count: test_suite_sizes_count = total_cases test_suite_sizes = np.arange(test_suite_sizes_count) + 1 test_suite_sizes = [ int(c * total_cases / test_suite_sizes_count) for c in test_suite_sizes ] logger.debug("Testing test suites of sizes {ss}", ss=test_suite_sizes) generator = SuiteGenerator(project_root, project_root, COMMON_EXCLUDE, max_trace_size=max_trace_size) points = [] no_suites_created_count_max = 3 for metric in coverage_metrics: no_suites_created_count = 0 for sub_test_suites_size in test_suite_sizes: logger.debug( "Test suite size: {sub_test_suites_size}, metric: {metric}", sub_test_suites_size=sub_test_suites_size, metric=metric) suites = generator.fix_sized_suites( module_under_test_path=module_under_test_path, coverage_metric=metric, exact_size=sub_test_suites_size, n=support, check_unique_items_covered=False, test_cases=node_ids) if not suites: no_suites_created_count += 1 else: no_suites_created_count = 0 if no_suites_created_count > no_suites_created_count_max: break for suite in suites: scores = [ scoring_function(suite) for scoring_function in scoring_functions ] point = (len(suite.test_cases), str(metric), *scores, suite.coverage) points.append(point) return points
class DefUsePairsCoverage(Coverage): metrics = { CoverageMetric.ALL_PAIRS, CoverageMetric.M_ONLY, CoverageMetric.IC_ONLY, CoverageMetric.IM_ONLY, CoverageMetric.M_AND_IM, CoverageMetric.M_AND_IC, CoverageMetric.IM_AND_IC, CoverageMetric.ALL_PAIRS } file_name_col = "File name" file_path_col = "File path" coverage_col = "Coverage" def __init__(self, trace_root, project_root, exclude_folders=None, max_trace_size=None): self.project_cfg = ProjectCFG.create_from_path( project_root, exclude_folders=exclude_folders, use_cached_if_possible=True) self.max_trace_size = max_trace_size self.trace_root = trace_root self.trace_reader = TraceReader(trace_root) self.vi = VarIndexFactory.new_py_index(project_root, trace_root) self.cppvi = VarIndexFactory.new_cpp_index(project_root, trace_root) def report(self, of_type=CoverageMetric.ALL_PAIRS): report = {} for module_path in self.trace_reader.files_mapping.keys(): covered_pairs_per_test_case = self.covered_items_of( module_path, of_type=of_type) covered_items_of_module = set() for s in covered_pairs_per_test_case.values(): covered_items_of_module.update(s) total_pairs = self.total_items_of(module_path, of_type=of_type) report[module_path] = percent(covered_items_of_module, total_pairs) report = pd.DataFrame.from_dict({self.coverage_col: report}, orient="columns") return report def group_items_by_key(self, items, key): d = defaultdict(list) if isinstance(key, str): attr = key key = lambda x: getattr(x, attr) for item in items: k = key(item) d[k].append(item) return d def total_items_of(self, module_path, of_type=CoverageMetric.ALL_PAIRS) -> Set: module = self.project_cfg.module_cfgs.get(module_path) if not module: return set() if of_type == CoverageMetric.M_ONLY: return module.intramethod_pairs elif of_type == CoverageMetric.IM_ONLY: return module.intermethod_pairs elif of_type == CoverageMetric.IC_ONLY: return module.interclass_pairs elif of_type == CoverageMetric.M_AND_IM: return module.intramethod_pairs | module.intermethod_pairs elif of_type == CoverageMetric.M_AND_IC: return module.intramethod_pairs | module.interclass_pairs elif of_type == CoverageMetric.IM_AND_IC: return module.intermethod_pairs | module.interclass_pairs elif of_type == CoverageMetric.ALL_PAIRS: return module.intramethod_pairs | module.intermethod_pairs | module.interclass_pairs else: raise ValueError( "Unknown coverage metric {} in parameter 'of_type'".format( of_type)) def covered_items_of(self, module_path, of_type=CoverageMetric.ALL_PAIRS, selected_node_ids=None) -> dict: data = {} total_items = self.total_items_of(module_path, of_type=of_type) node_ids, traces_paths = self.trace_reader.get_traces_for( module_path, selected_node_ids=selected_node_ids) module_name = os.path.basename(module_path) desc = "Finding def-use pairs of type {} for module {}".format( of_type.name, module_name) for test_case, trace_file_path in tqdm(zip(node_ids, traces_paths), total=len(node_ids), desc=desc, unit="test_case"): np_array, fsize = read_as_np_array(trace_file_path, max_size_mb=self.max_trace_size) if np_array is None: continue scopes = read_scopes_for_trace_file(trace_file_path) if not scopes: continue if of_type == CoverageMetric.M_ONLY: mp = analyze_trace_w_index(trace_file_path, self.cppvi) data[test_case] = rename_vars(mp) elif of_type == CoverageMetric.IM_ONLY: imp, icp = analyze(trace_file_path, self.vi, scopes) data[test_case] = rename_vars(imp) elif of_type == CoverageMetric.IC_ONLY: imp, icp = analyze(trace_file_path, self.vi, scopes) data[test_case] = rename_vars(icp) elif of_type == CoverageMetric.M_AND_IM: mp = analyze_trace_w_index(trace_file_path, self.cppvi) imp, icp = analyze(trace_file_path, self.vi, scopes) data[test_case] = rename_vars(mp) | rename_vars(imp) elif of_type == CoverageMetric.M_AND_IC: mp = analyze_trace_w_index(trace_file_path, self.cppvi) imp, icp = analyze(trace_file_path, self.vi, scopes) data[test_case] = rename_vars(mp) | rename_vars(icp) elif of_type == CoverageMetric.IM_AND_IC: imp, icp = analyze(trace_file_path, self.vi, scopes) data[test_case] = rename_vars(imp) | rename_vars(icp) elif of_type == CoverageMetric.ALL_PAIRS: mp = analyze_trace_w_index(trace_file_path, self.cppvi) imp, icp = analyze(trace_file_path, self.vi, scopes) data[test_case] = rename_vars(mp) | rename_vars( imp) | rename_vars(icp) else: raise ValueError( "Unknown coverage metric {} in parameter 'of_type'".format( of_type)) data[test_case] = data[test_case].intersection(total_items) return data
if len(failing_on_assertion_node_ids) > 0: repo.status = STATUS_NO_MODULE_CFG repo.save() repo_stat.mark_repo_as_good(manager) logger.warning("Found node ids which failed on assertion!") graphs_path = graphs_path_parent module_under_test_path = manager.test_module_path logger.info("Running the experiment for module {p}", p=module_under_test_path) st = time() buggy_project.run_command( f"python3 {thorough_path} -t --trace_dir={buggy_project.path}", extra_requirements=extra_requirements) elapsed_tracing = int(time() - st) trace_reader = TraceReader(buggy_project.path) module_under_test_absolute = os.path.join( buggy_project.path, module_under_test_path) covering_node_ids, paths = trace_reader.get_traces_for( module_under_test_absolute) covering_node_ids = set(covering_node_ids) bad_node_ids = after - failing_on_assertion_node_ids covering_node_ids -= bad_node_ids not_failing_node_ids = covering_node_ids - failing_on_assertion_node_ids not_failing_node_ids_as_params = " ".join( not_failing_node_ids) return_code = buggy_project.run_command( f"pytest {not_failing_node_ids_as_params}") if return_code == 0: logger.warning("Test cases can be run individually")
out_folder = maybe_expand(args.out) module = args.module timeout = args.timeout revealing_node_ids = args.revealing_node_ids exclude_folders_tracing = COMMON_EXCLUDE thorough.run_tests(project_root, trace_root, exclude_folders_tracing=exclude_folders_tracing, exclude_folders_collection=None, show_time_per_test=False, quiet=False, deselect_tests=None) trace_reader = TraceReader(trace_root) failed_node_ids = trace_reader.read_failed_test_cases() module = maybe_expand(module) coverage_metrics = (CoverageMetric.STATEMENT, CoverageMetric.BRANCH, CoverageMetric.ALL_PAIRS) # coverage_metrics = (CoverageMetric.ALL_PAIRS, CoverageMetric.ALL_C_USES, CoverageMetric.ALL_P_USES) new_module_path = module_path(out_folder, project_root, module) shutil.copy(module, new_module_path) # fixed size df, total_mutants_count, total_bugs_count = combined_experiment_fixed_size( project_root=project_root, module_under_test_path=module, revealing_node_ids=revealing_node_ids,
def create(project_root, traces_root): trace_reader = TraceReader(traces_root) return VarIndex(read_du_index(project_root), trace_reader.files_mapping.index)
max_trace_size = args.max_trace_size graphs_folder = maybe_expand(args.graphs_folder) exclude_folders_tracing = ["dataset", "venv", "tests", "test"] thorough.run_tests( project_root, trace_root, exclude_folders_tracing=exclude_folders_tracing, exclude_folders_collection=None, show_time_per_test=False, quiet=False, deselect_tests=None, # node_ids=args.node_ids ) trace_reader = TraceReader(trace_root) failed_test_cases = trace_reader.read_failed_test_cases() print("Failed cases:", failed_test_cases) if len(failed_test_cases) == 0: raise ValueError("No test cases failed") def experiment(module): df_fixed_size, total_bugs = run_real_bugs_experiment_fixed_size( project_root, module, node_ids, test_suite_sizes_count, max_trace_size) plot_type = "fixed_size_box_plot" title = f"{Path(module).name}, {total_bugs} bugs" image_p = image_path(graphs_folder, project_root, module, plot_type)