def test_intra_method_pairs(self):
        len_pairs = [
            9, 8, 13, 19, 3, 20, 8, 5, 18, 16, 12, 19, 3, 18, 18, 24, 9, 2, 8
        ]

        project_root = CLEAN_LINKED_LIST_ROOT
        trace_root = create_new_temp_dir()
        exclude_folders = ["venv"]
        cfg = ProjectCFG.create_from_path(project_root,
                                          exclude_folders=exclude_folders)

        thorough.run_tests(CLEAN_LINKED_LIST_ROOT, trace_root, exclude_folders)
        trace_reader = TraceReader(trace_root)

        cppvi = VarIndexFactory.new_cpp_index(project_root, trace_root)

        ll_py = str(CLEAN_LINKED_LIST_LL)

        def get_pairs(trace_file_path):
            np_array, _ = read_as_np_array(trace_file_path)
            idx_pairs = analyze_trace_w_index(trace_file_path, cppvi)

            def rename_vars(s):
                return {(el[0], el[1]) for el in s}

            idx_pairs = rename_vars(idx_pairs)
            return idx_pairs

        node_ids, paths = trace_reader.get_traces_for(ll_py)
        for node_id, path, expected_pairs_count in zip(node_ids, paths,
                                                       len_pairs):
            pairs = get_pairs(path)
            self.assertEqual(
                expected_pairs_count, len(pairs),
                "Pairs count don't match for test case: {}".format(node_id))
    def test_branch_coverage(self):
        project_root = CLEAN_LINKED_LIST_ROOT
        # trace_root = create_new_temp_dir()
        trace_root = CLEAN_LINKED_LIST_ROOT

        exclude_folders = ["venv", "dataset"]
        cfg = ProjectCFG.create_from_path(project_root,
                                          exclude_folders=exclude_folders,
                                          use_cached_if_possible=False)

        thorough.run_tests(CLEAN_LINKED_LIST_ROOT, trace_root, exclude_folders)

        trace_reader = TraceReader(trace_root)

        ll_py = str(CLEAN_LINKED_LIST_LL)
        ll_py_cfg = cfg.module_cfgs[ll_py]

        total_exercised = set()
        available_branches = ll_py_cfg.branches

        for node_id, path in zip(*trace_reader.get_traces_for(ll_py)):
            df, size = read_as_dataframe(path)
            covered = find_covered_branches(df, ll_py_cfg.branches)
            total_exercised.update(covered)

        print("Coverage")
        print_percent("Branches covered", total_exercised, ll_py_cfg.branches)
        print(available_branches)
        print(total_exercised)
        not_exercised_branches = set(available_branches) - set(total_exercised)
        print("Not exercised branches total ({}): ".format(len(not_exercised_branches)), not_exercised_branches)
        self.assertEqual(13, len(not_exercised_branches))
def run_mutation_experiment_fixed_coverage(project_root,
                                           module_under_test_path,
                                           coverage_boundaries_count=20,
                                           max_trace_size=10,
                                           timeout=None,
                                           coverage_metrics=None,
                                           support=100):
    logger.debug("Running mutation experiment (fixed coverage) for {module}",
                 module=module_under_test_path)
    trace_reader = TraceReader(project_root)
    not_failing_node_ids = trace_reader.get_not_failing_node_ids(
        module_under_test_path)
    scoring_function, total_mutants_count = get_mutation_scoring_function(
        project_root,
        module_under_test_path,
        not_failing_node_ids,
        timeout=timeout)

    points = generic_experiment_coverage(
        project_root,
        module_under_test_path, [scoring_function],
        coverage_boundaries_count=coverage_boundaries_count,
        max_trace_size=max_trace_size,
        coverage_metrics=coverage_metrics,
        support=support)

    df = pd.DataFrame(
        data=points,
        columns=[SUITE_SIZE, METRIC, MUTATION_SCORE, SUITE_COVERAGE])
    df = bin_zero_to_one_column_to_percent(df, SUITE_COVERAGE,
                                           SUITE_COVERAGE_BIN,
                                           coverage_boundaries_count)
    return df, total_mutants_count
def trace_this(function,
               project_root=PROJECT_ROOT,
               trace_root=TEMP_DIRECTORY,
               args=None,
               kwargs=None):
    if args is None:
        args = []
    if kwargs is None:
        kwargs = {}
    target_source_file = inspect.getsourcefile(function)

    keep = [str(project_root)]
    exclude = [str(project_root / "venv")]
    trace_folder = trace_root
    tracer = Tracer(keep, exclude, trace_folder_parent=str(trace_folder))

    trace_name = function.__name__
    tracer.start(trace_name)
    function(*args, **kwargs)
    tracer.stop()
    tracer.full_stop()

    trace_reader = TraceReader(trace_root)

    return trace_reader.trace_path(trace_name, target_source_file)
def run_mutation_experiment_fixed_size(project_root,
                                       module_under_test_path,
                                       test_suite_sizes_count=30,
                                       test_suite_sizes=None,
                                       max_trace_size=10,
                                       timeout=None,
                                       coverage_metrics=None,
                                       support=100):
    logger.debug("Running mutation experiment (fixed size) for {module}",
                 module=module_under_test_path)
    trace_reader = TraceReader(project_root)
    not_failing_node_ids = trace_reader.get_not_failing_node_ids(
        module_under_test_path)

    mutation_scoring_function, total_mutants_count = get_mutation_scoring_function(
        project_root,
        module_under_test_path,
        not_failing_node_ids=not_failing_node_ids,
        timeout=timeout)

    points = generic_experiment_size(
        project_root,
        module_under_test_path, [mutation_scoring_function],
        test_suite_sizes_count=test_suite_sizes_count,
        test_suite_sizes=test_suite_sizes,
        max_trace_size=max_trace_size,
        coverage_metrics=coverage_metrics,
        node_ids=tuple(not_failing_node_ids),
        support=support)

    df = pd.DataFrame(
        data=points,
        columns=[SUITE_SIZE, METRIC, MUTATION_SCORE, SUITE_COVERAGE])

    return df, total_mutants_count
def run_real_bugs_experiment_fixed_size(project_root,
                                        module_under_test_path,
                                        revealing_node_ids,
                                        test_suite_sizes_count=30,
                                        test_suite_sizes=None,
                                        max_trace_size=10,
                                        coverage_metrics=None,
                                        support=100):
    logger.debug("Running real project experiment for {module} (fixed size)",
                 module=module_under_test_path)

    revealing_node_ids = set(revealing_node_ids)
    total_revealing_node_ids = len(revealing_node_ids)

    trace_reader = TraceReader(project_root)
    not_failing_node_ids = trace_reader.get_not_failing_node_ids(
        module_under_test_path)

    scoring_function = get_bugs_scoring_function(revealing_node_ids)
    points = generic_experiment_size(
        project_root,
        module_under_test_path, [scoring_function],
        test_suite_sizes_count=test_suite_sizes_count,
        test_suite_sizes=test_suite_sizes,
        max_trace_size=max_trace_size,
        coverage_metrics=coverage_metrics,
        node_ids=set(not_failing_node_ids) | set(revealing_node_ids),
        support=support)

    df = pd.DataFrame(
        data=points,
        columns=[SUITE_SIZE, METRIC, BUG_REVEALED_SCORE, SUITE_COVERAGE])

    return df, total_revealing_node_ids
Example #7
0
    def test_failed_tests_recorded(self):
        project_root = BUGGY_LINKED_LIST_ROOT

        exclude_folders = ["venv"]
        thorough_location = str(PROJECT_ROOT / "thorough.py")
        subprocess.run(f"python3 {thorough_location} -t --trace_dir {str(project_root)}", cwd=project_root, shell=True)
        trace_root = project_root
        trace_reader = TraceReader(trace_root)
        failed_cases = trace_reader.read_failed_test_cases()
        self.assertEqual(1, len(failed_cases))
        expected_failed_test_case = "tests/test_linked_list_module.py::LinkedListTest::test_append_on_removed"
        self.assertEqual(expected_failed_test_case, failed_cases[0])
 def __init__(self,
              trace_root,
              project_root,
              exclude_folders=None,
              max_trace_size=None):
     self.project_cfg = ProjectCFG.create_from_path(
         project_root,
         exclude_folders=exclude_folders,
         use_cached_if_possible=True)
     self.max_trace_size = max_trace_size
     self.trace_root = trace_root
     self.trace_reader = TraceReader(trace_root)
     self.vi = VarIndexFactory.new_py_index(project_root, trace_root)
     self.cppvi = VarIndexFactory.new_cpp_index(project_root, trace_root)
    def test_inter_method_pairs_test_interclass_pairs(self):
        len_im_pairs = [
            0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
        ]
        len_ic_pairs = [
            5, 2, 5, 4, 1, 4, 2, 1, 5, 5, 7, 4, 1, 10, 7, 5, 9, 1, 4
        ]

        project_root = CLEAN_LINKED_LIST_ROOT
        trace_root = create_new_temp_dir()
        exclude_folders = ["venv"]
        cfg = ProjectCFG.create_from_path(project_root,
                                          exclude_folders=exclude_folders)

        thorough.run_tests(CLEAN_LINKED_LIST_ROOT, trace_root, exclude_folders)
        trace_reader = TraceReader(trace_root)

        cppvi = VarIndexFactory.new_py_index(project_root, trace_root)

        ll_py = str(CLEAN_LINKED_LIST_LL)

        def get_pairs(trace_file_path):
            np_array, _ = read_as_np_array(trace_file_path)
            scopes = read_scopes_for_trace_file(trace_file_path)
            im_pairs, ic_pairs = analyze(trace_file_path, cppvi, scopes)

            def rename_vars(s):
                return {(el[0], el[1]) for el in s}

            im_pairs = rename_vars(im_pairs)
            ic_pairs = rename_vars(ic_pairs)
            return im_pairs, ic_pairs

        node_ids, paths = trace_reader.get_traces_for(ll_py)
        for node_id, path, expected_im_len, expected_ic_len in zip(
                node_ids, paths, len_im_pairs, len_ic_pairs):
            im_pairs, ic_pairs = get_pairs(path)
            self.assertEqual(
                expected_im_len, len(im_pairs),
                "Intermethod pairs count don't match for test case: {}".format(
                    node_id))
            self.assertEqual(
                expected_ic_len, len(ic_pairs),
                "Intermethod pairs count don't match for test case: {}".format(
                    node_id))
def combined_experiment_fixed_coverage(project_root,
                                       module_under_test_path,
                                       revealing_node_ids,
                                       coverage_boundaries_count=20,
                                       max_trace_size=10,
                                       coverage_metrics=None,
                                       support=100,
                                       timeout=None):
    logger.debug(
        "Running real project experiment for {module} (fixed coverage)",
        module=module_under_test_path)

    trace_reader = TraceReader(project_root)
    not_failing_node_ids = trace_reader.get_not_failing_node_ids(
        module_under_test_path)

    mutation_scoring_function, total_mutants_count = get_mutation_scoring_function(
        project_root,
        module_under_test_path,
        not_failing_node_ids=not_failing_node_ids,
        timeout=timeout)

    bugs_scoring_function = get_bugs_scoring_function(revealing_node_ids)
    total_bugs_count = len(revealing_node_ids)

    points = generic_experiment_coverage(
        project_root,
        module_under_test_path,
        [mutation_scoring_function, bugs_scoring_function],
        coverage_boundaries_count=coverage_boundaries_count,
        max_trace_size=max_trace_size,
        coverage_metrics=coverage_metrics,
        node_ids=set(revealing_node_ids) | set(not_failing_node_ids),
        support=support)
    columns = [
        SUITE_SIZE, METRIC, MUTATION_SCORE, BUG_REVEALED_SCORE, SUITE_COVERAGE
    ]
    df = pd.DataFrame(data=points, columns=columns)

    df = bin_zero_to_one_column_to_percent(df, SUITE_COVERAGE,
                                           SUITE_COVERAGE_BIN,
                                           coverage_boundaries_count)

    return df, total_mutants_count, total_bugs_count
Example #11
0
    def test_mutate_linked_list_module(self):
        project_root = Path(
            __file__).parent.parent.parent / "dataset/linked_list_clean"
        module_under_test_path = project_root / "core" / "ll.py"

        trace_reader = TraceReader(project_root)
        not_failing_node_ids = trace_reader.get_not_failing_node_ids(
            module_under_test_path)

        killed, total = killed_mutants(
            project_root=str(project_root),
            module_under_test_path=str(module_under_test_path),
            not_failing_node_ids=not_failing_node_ids,
            timeout=None)
        s = set()
        for m in killed.values():
            s.update(m)
        self.assertEqual(46, total)
        self.assertEqual(30, len(s))
def generic_experiment_size(project_root,
                            module_under_test_path,
                            scoring_functions,
                            test_suite_sizes_count=30,
                            test_suite_sizes=None,
                            max_trace_size=10,
                            coverage_metrics=None,
                            node_ids=None,
                            support=100):
    logger.debug("Running mutation experiment for {module}",
                 module=module_under_test_path)

    if coverage_metrics is None:
        coverage_metrics = DEFAULT_METRICS
    cfg = ProjectCFG.create_from_path(project_root,
                                      exclude_folders=COMMON_EXCLUDE)
    if node_ids is None:
        trace_reader = TraceReader(project_root)
        node_ids, paths = trace_reader.get_traces_for(module_under_test_path)
    total_cases = len(node_ids)
    if test_suite_sizes is None:
        if total_cases < test_suite_sizes_count:
            test_suite_sizes_count = total_cases
        test_suite_sizes = np.arange(test_suite_sizes_count) + 1
        test_suite_sizes = [
            int(c * total_cases / test_suite_sizes_count)
            for c in test_suite_sizes
        ]

    logger.debug("Testing test suites of sizes {ss}", ss=test_suite_sizes)
    generator = SuiteGenerator(project_root,
                               project_root,
                               COMMON_EXCLUDE,
                               max_trace_size=max_trace_size)
    points = []
    no_suites_created_count_max = 3
    for metric in coverage_metrics:
        no_suites_created_count = 0
        for sub_test_suites_size in test_suite_sizes:
            logger.debug(
                "Test suite size: {sub_test_suites_size}, metric: {metric}",
                sub_test_suites_size=sub_test_suites_size,
                metric=metric)

            suites = generator.fix_sized_suites(
                module_under_test_path=module_under_test_path,
                coverage_metric=metric,
                exact_size=sub_test_suites_size,
                n=support,
                check_unique_items_covered=False,
                test_cases=node_ids)
            if not suites:
                no_suites_created_count += 1
            else:
                no_suites_created_count = 0
            if no_suites_created_count > no_suites_created_count_max:
                break

            for suite in suites:
                scores = [
                    scoring_function(suite)
                    for scoring_function in scoring_functions
                ]
                point = (len(suite.test_cases), str(metric), *scores,
                         suite.coverage)
                points.append(point)
    return points
class DefUsePairsCoverage(Coverage):
    metrics = {
        CoverageMetric.ALL_PAIRS, CoverageMetric.M_ONLY,
        CoverageMetric.IC_ONLY, CoverageMetric.IM_ONLY,
        CoverageMetric.M_AND_IM, CoverageMetric.M_AND_IC,
        CoverageMetric.IM_AND_IC, CoverageMetric.ALL_PAIRS
    }
    file_name_col = "File name"
    file_path_col = "File path"
    coverage_col = "Coverage"

    def __init__(self,
                 trace_root,
                 project_root,
                 exclude_folders=None,
                 max_trace_size=None):
        self.project_cfg = ProjectCFG.create_from_path(
            project_root,
            exclude_folders=exclude_folders,
            use_cached_if_possible=True)
        self.max_trace_size = max_trace_size
        self.trace_root = trace_root
        self.trace_reader = TraceReader(trace_root)
        self.vi = VarIndexFactory.new_py_index(project_root, trace_root)
        self.cppvi = VarIndexFactory.new_cpp_index(project_root, trace_root)

    def report(self, of_type=CoverageMetric.ALL_PAIRS):
        report = {}

        for module_path in self.trace_reader.files_mapping.keys():
            covered_pairs_per_test_case = self.covered_items_of(
                module_path, of_type=of_type)
            covered_items_of_module = set()
            for s in covered_pairs_per_test_case.values():
                covered_items_of_module.update(s)
            total_pairs = self.total_items_of(module_path, of_type=of_type)
            report[module_path] = percent(covered_items_of_module, total_pairs)

        report = pd.DataFrame.from_dict({self.coverage_col: report},
                                        orient="columns")
        return report

    def group_items_by_key(self, items, key):
        d = defaultdict(list)
        if isinstance(key, str):
            attr = key
            key = lambda x: getattr(x, attr)
        for item in items:
            k = key(item)
            d[k].append(item)
        return d

    def total_items_of(self,
                       module_path,
                       of_type=CoverageMetric.ALL_PAIRS) -> Set:
        module = self.project_cfg.module_cfgs.get(module_path)
        if not module:
            return set()
        if of_type == CoverageMetric.M_ONLY:
            return module.intramethod_pairs
        elif of_type == CoverageMetric.IM_ONLY:
            return module.intermethod_pairs
        elif of_type == CoverageMetric.IC_ONLY:
            return module.interclass_pairs
        elif of_type == CoverageMetric.M_AND_IM:
            return module.intramethod_pairs | module.intermethod_pairs
        elif of_type == CoverageMetric.M_AND_IC:
            return module.intramethod_pairs | module.interclass_pairs
        elif of_type == CoverageMetric.IM_AND_IC:
            return module.intermethod_pairs | module.interclass_pairs
        elif of_type == CoverageMetric.ALL_PAIRS:
            return module.intramethod_pairs | module.intermethod_pairs | module.interclass_pairs
        else:
            raise ValueError(
                "Unknown coverage metric {} in parameter 'of_type'".format(
                    of_type))

    def covered_items_of(self,
                         module_path,
                         of_type=CoverageMetric.ALL_PAIRS,
                         selected_node_ids=None) -> dict:
        data = {}
        total_items = self.total_items_of(module_path, of_type=of_type)
        node_ids, traces_paths = self.trace_reader.get_traces_for(
            module_path, selected_node_ids=selected_node_ids)
        module_name = os.path.basename(module_path)
        desc = "Finding def-use pairs of type {} for module {}".format(
            of_type.name, module_name)
        for test_case, trace_file_path in tqdm(zip(node_ids, traces_paths),
                                               total=len(node_ids),
                                               desc=desc,
                                               unit="test_case"):
            np_array, fsize = read_as_np_array(trace_file_path,
                                               max_size_mb=self.max_trace_size)
            if np_array is None:
                continue

            scopes = read_scopes_for_trace_file(trace_file_path)
            if not scopes:
                continue

            if of_type == CoverageMetric.M_ONLY:
                mp = analyze_trace_w_index(trace_file_path, self.cppvi)
                data[test_case] = rename_vars(mp)
            elif of_type == CoverageMetric.IM_ONLY:
                imp, icp = analyze(trace_file_path, self.vi, scopes)
                data[test_case] = rename_vars(imp)
            elif of_type == CoverageMetric.IC_ONLY:
                imp, icp = analyze(trace_file_path, self.vi, scopes)
                data[test_case] = rename_vars(icp)
            elif of_type == CoverageMetric.M_AND_IM:
                mp = analyze_trace_w_index(trace_file_path, self.cppvi)
                imp, icp = analyze(trace_file_path, self.vi, scopes)
                data[test_case] = rename_vars(mp) | rename_vars(imp)
            elif of_type == CoverageMetric.M_AND_IC:
                mp = analyze_trace_w_index(trace_file_path, self.cppvi)
                imp, icp = analyze(trace_file_path, self.vi, scopes)
                data[test_case] = rename_vars(mp) | rename_vars(icp)
            elif of_type == CoverageMetric.IM_AND_IC:
                imp, icp = analyze(trace_file_path, self.vi, scopes)
                data[test_case] = rename_vars(imp) | rename_vars(icp)
            elif of_type == CoverageMetric.ALL_PAIRS:
                mp = analyze_trace_w_index(trace_file_path, self.cppvi)
                imp, icp = analyze(trace_file_path, self.vi, scopes)
                data[test_case] = rename_vars(mp) | rename_vars(
                    imp) | rename_vars(icp)
            else:
                raise ValueError(
                    "Unknown coverage metric {} in parameter 'of_type'".format(
                        of_type))
            data[test_case] = data[test_case].intersection(total_items)
        return data
                if len(failing_on_assertion_node_ids) > 0:
                    repo.status = STATUS_NO_MODULE_CFG
                    repo.save()
                    repo_stat.mark_repo_as_good(manager)

                    logger.warning("Found node ids which failed on assertion!")
                    graphs_path = graphs_path_parent
                    module_under_test_path = manager.test_module_path
                    logger.info("Running the experiment for module {p}",
                                p=module_under_test_path)
                    st = time()
                    buggy_project.run_command(
                        f"python3 {thorough_path} -t --trace_dir={buggy_project.path}",
                        extra_requirements=extra_requirements)
                    elapsed_tracing = int(time() - st)
                    trace_reader = TraceReader(buggy_project.path)
                    module_under_test_absolute = os.path.join(
                        buggy_project.path, module_under_test_path)
                    covering_node_ids, paths = trace_reader.get_traces_for(
                        module_under_test_absolute)
                    covering_node_ids = set(covering_node_ids)
                    bad_node_ids = after - failing_on_assertion_node_ids
                    covering_node_ids -= bad_node_ids
                    not_failing_node_ids = covering_node_ids - failing_on_assertion_node_ids
                    not_failing_node_ids_as_params = " ".join(
                        not_failing_node_ids)
                    return_code = buggy_project.run_command(
                        f"pytest {not_failing_node_ids_as_params}")

                    if return_code == 0:
                        logger.warning("Test cases can be run individually")
    out_folder = maybe_expand(args.out)

    module = args.module
    timeout = args.timeout
    revealing_node_ids = args.revealing_node_ids

    exclude_folders_tracing = COMMON_EXCLUDE
    thorough.run_tests(project_root,
                       trace_root,
                       exclude_folders_tracing=exclude_folders_tracing,
                       exclude_folders_collection=None,
                       show_time_per_test=False,
                       quiet=False,
                       deselect_tests=None)

    trace_reader = TraceReader(trace_root)
    failed_node_ids = trace_reader.read_failed_test_cases()

    module = maybe_expand(module)
    coverage_metrics = (CoverageMetric.STATEMENT, CoverageMetric.BRANCH,
                        CoverageMetric.ALL_PAIRS)
    # coverage_metrics = (CoverageMetric.ALL_PAIRS, CoverageMetric.ALL_C_USES, CoverageMetric.ALL_P_USES)

    new_module_path = module_path(out_folder, project_root, module)
    shutil.copy(module, new_module_path)

    # fixed size
    df, total_mutants_count, total_bugs_count = combined_experiment_fixed_size(
        project_root=project_root,
        module_under_test_path=module,
        revealing_node_ids=revealing_node_ids,
Example #16
0
 def create(project_root, traces_root):
     trace_reader = TraceReader(traces_root)
     return VarIndex(read_du_index(project_root),
                     trace_reader.files_mapping.index)
    max_trace_size = args.max_trace_size
    graphs_folder = maybe_expand(args.graphs_folder)

    exclude_folders_tracing = ["dataset", "venv", "tests", "test"]
    thorough.run_tests(
        project_root,
        trace_root,
        exclude_folders_tracing=exclude_folders_tracing,
        exclude_folders_collection=None,
        show_time_per_test=False,
        quiet=False,
        deselect_tests=None,
        # node_ids=args.node_ids
    )

    trace_reader = TraceReader(trace_root)
    failed_test_cases = trace_reader.read_failed_test_cases()

    print("Failed cases:", failed_test_cases)
    if len(failed_test_cases) == 0:
        raise ValueError("No test cases failed")

    def experiment(module):
        df_fixed_size, total_bugs = run_real_bugs_experiment_fixed_size(
            project_root, module, node_ids, test_suite_sizes_count,
            max_trace_size)

        plot_type = "fixed_size_box_plot"
        title = f"{Path(module).name}, {total_bugs} bugs"

        image_p = image_path(graphs_folder, project_root, module, plot_type)