Exemple #1
0
def main():
    example_path = os.path.dirname(__file__)
    output_dir = f"{example_path}/output"
    input_dir = f"{example_path}/input"
    with zipfile.ZipFile(f"{input_dir}/commons_lang.zip", "r") as target_zip:
        # 0. Prepare target file and tool
        target_zip.extractall(input_dir)

        # 1. Mining
        repository_path = f"{input_dir}/commons_lang"
        miner = DataMiner(output_dir, repository_path)
        miner.mining(start_date=datetime(2020, 10, 1))
        # 2. Predicting
        database_path = f"{output_dir}/commons_lang.db"
        TraceabilityPredictor(database_path).run(
            LinkStrategy.COCHANGE, LinkBase.FOR_COMMITS
        )
        ground_truth_csv_path = f"{input_dir}/gt_commons_lang.csv"
        # 3. Evaluating
        evaluator = LinkEvaluator(database_path, ground_truth_csv_path)
        print(
            evaluator.precision_recall_and_f1_score_of_strategy(
                "links_commits_based_cochanged"
            )
        )
Exemple #2
0
def print_apriori_report() -> None:
    evaluate_report = LinkEvaluator(path_to_db, path_to_csv)
    print(
        evaluate_report.precision_recall_and_f1_score_of_strategy(
            "apriori_for_commits"))
    print(
        evaluate_report.precision_recall_and_f1_score_of_strategy(
            "apriori_for_weeks"))
Exemple #3
0
def print_complete_report() -> None:
    evaluate_report = LinkEvaluator(path_to_db, path_to_csv)
    print(
        "links_commits_based_apriori: ",
        evaluate_report.precision_recall_and_f1_score_of_strategy(
            "links_commits_based_apriori"),
    )
    print(
        "links_commits_based_cochanged: ",
        evaluate_report.precision_recall_and_f1_score_of_strategy(
            "links_commits_based_cochanged"),
    )
    print(
        "links_commits_based_cocreated: ",
        evaluate_report.precision_recall_and_f1_score_of_strategy(
            "links_commits_based_cocreated"),
    )
    print(
        "links_weeks_based_apriori: ",
        evaluate_report.precision_recall_and_f1_score_of_strategy(
            "links_weeks_based_apriori"),
    )
    print(
        "links_weeks_based_cochanged: ",
        evaluate_report.precision_recall_and_f1_score_of_strategy(
            "links_weeks_based_cochanged"),
    )
    print(
        "links_weeks_based_cocreated: ",
        evaluate_report.precision_recall_and_f1_score_of_strategy(
            "links_weeks_based_cocreated"),
    )
    return None
Exemple #4
0
def draw_scatter_figure_for_coordinates_for_methods_and_commits_type():
    fig = plt.figure(num=1, figsize=(15, 15))
    axes = fig.add_subplot(111, projection="3d")

    def draw_3d_scatter(
        by_3d_coordinates: List[Tuple[int, int, int]],
        color: str,
        marker: str,
        size: int,
    ):
        xs, ys, zs = list(), list(), list()
        for x, y, z in by_3d_coordinates:
            xs.append(x)
            ys.append(y)
            zs.append(z)
        axes.scatter(np.array(xs),
                     np.array(ys),
                     np.array(zs),
                     c=color,
                     marker=marker,
                     s=size)

    coordinates = LinkEvaluator(path_to_db,
                                path_to_csv).coordinates_for_methods_commits()
    draw_3d_scatter(coordinates.coordinates_for_test, "r", ".", 1)
    draw_3d_scatter(coordinates.coordinates_for_tested, "b", "^", 1)
    print(coordinates.package_name_x_table)
    print(coordinates.commit_hash_y_table)
    print(coordinates.change_type_z_table)
    plt.title("sale")
    plt.xlabel("method_id")
    plt.ylabel("commit_id")
    plt.show()
Exemple #5
0
def draw_3D_for_co_changed():
    evaluator = LinkEvaluator(path_to_db, path_to_csv)
    evaluation = evaluator.raw_links_for_predicated_and_ground_truth(
        "apriori_for_weeks")
    num_of_links = len(evaluation.ground_truth_links)
    gt_links_id_pair = list(evaluation.ground_truth_links.keys())
    pd_links_dict = evaluation.predict_links
    valid_pd_links_dict = evaluation.valid_predict_links
    predicted_links: List[float] = list()
    for link in gt_links_id_pair:
        if link not in valid_pd_links_dict:
            predicted_links.append(0.0)
        else:
            predicted_links.append(valid_pd_links_dict[link])
    fig = plt.figure(num=1, figsize=(15, 15))
    ax1 = fig.add_subplot(111, projection="3d")
    __draw_scatter_from_dict(ax1, pd_links_dict, "b", ".", 1)
    __draw_scatter_from_dict(ax1, valid_pd_links_dict, "r", "^", 10)
    __draw_scatter_from_dict(ax1, evaluation.ground_truth_links, "g", "^", 20)
    plt.title("sale")
    plt.xlabel("tested_id")
    plt.ylabel("test_id")
    plt.show()
Exemple #6
0
def theory_max_precision():
    evaluator = LinkEvaluator(path_to_db, path_to_csv)
    report = evaluator.co_changed_commits()
    co_changes_commits = report.co_changes_commits

    commit_method_pairs: Dict[str, Set[Tuple[int, int]]] = dict()
    for method_pair, commit_ids in co_changes_commits.items():
        for commit_id in commit_ids:
            commit_hash = report.from_commit_id_to_hash(commit_id)
            commit_method_pairs.setdefault(commit_hash, set())
            commit_method_pairs[commit_hash].add(method_pair)

    path_to_gt_count = f"{path_to_tmp}/table_for_ground_truth_occurred_commits.csv"
    csv_data = pd.read_csv(path_to_gt_count, index_col=0)
    commits_count: Dict[str, int] = dict()
    for commits in co_changes_commits.values():
        for commit_id in commits:
            commit_hash = report.from_commit_id_to_hash(commit_id)
            commit_count = csv_data.loc[commit_hash][-2]
            if commit_hash in commits_count:
                continue
            if commit_count > 14 or commit_count < 3:
                continue
            commits_count[commit_hash] = commit_count

    sorted_commits = sorted(commits_count.keys(),
                            key=lambda hash_val: (commits_count[hash_val]))
    min_commits = set()
    cur_methods_scope = set()
    for commit_hash in sorted_commits:
        method_pairs = commit_method_pairs[commit_hash]
        if cur_methods_scope.issuperset(method_pairs):
            continue
        cur_methods_scope.update(method_pairs)
        min_commits.add(commit_hash)

    db_connection = sqlite3.connect(path_to_db)

    def find_all_links_in_commits(commit_hash: str) -> Set[Tuple[int, int]]:
        db_cursor = db_connection.cursor()
        exe_rst = db_cursor.execute(
            f"""
            WITH test_methods AS (
                SELECT id FROM git_methods
                WHERE file_path LIKE 'src/test/java/org/apache/commons/lang3%'
            ), tested_functions AS (
                SELECT id FROM git_methods
                WHERE file_path LIKE 'src/main/java/org/apache/commons/lang3%'
            ), changes_test_in_commits AS (
                SELECT target_method_id AS test_id FROM  git_changes
                WHERE commit_hash = :commit_hash
                AND target_method_id IN test_methods
            ), changes_tested_in_commits AS (
                SELECT target_method_id AS tested_id FROM  git_changes
                WHERE commit_hash = :commit_hash
                AND target_method_id IN tested_functions
            )
            SELECT DISTINCT tested_id, test_id
            FROM changes_test_in_commits
            LEFT OUTER JOIN changes_tested_in_commits
        """,
            {"commit_hash": commit_hash},
        )
        return {(int(row[0]), int(row[1]))
                for row in exe_rst.fetchall()
                if row is not None and len(row) == 2}

    predicated_pairs = set()
    test_ids = set(report.test_changed_commits.keys())
    for commit_hash in min_commits:
        candidate = find_all_links_in_commits(commit_hash)
        for pair in candidate:
            if pair[1] in test_ids:
                predicated_pairs.add(pair)

    print(len(predicated_pairs))
Exemple #7
0
def draw_2d_scatter_for_commits_distributions():
    def draw_2d_scatter_for_type_z(
        axes: Axes,
        by_3d_coordinates: List[Tuple[int, int, int]],
        title: str,
        y_max: Optional[int] = None,
    ) -> None:
        added: Tuple[List[int], List[int]] = (list(), list())
        modified: Tuple[List[int], List[int]] = (list(), list())
        renamed: Tuple[List[int], List[int]] = (list(), list())
        for change_commit, change_count, change_type in by_3d_coordinates:
            if y_max is not None and change_count > y_max:
                print(
                    f"IGNORE COMMITS({change_commit}), SIZE ({change_count}), TYPE({change_type})."
                )
                continue
            if change_type == 1:
                added[0].append(change_commit)
                added[1].append(change_count)
            elif change_type == 2:
                modified[0].append(change_commit)
                modified[1].append(change_count)
            elif change_type == 3:
                renamed[0].append(change_commit)
                renamed[1].append(change_count)
        axes.scatter(np.array(added[0]),
                     np.array(added[1]),
                     c="r",
                     marker=".",
                     s=1)
        axes.scatter(np.array(modified[0]),
                     np.array(modified[1]),
                     c="b",
                     marker=".",
                     s=1)
        axes.scatter(np.array(renamed[0]),
                     np.array(renamed[1]),
                     c="g",
                     marker=".",
                     s=1)
        axes.set_xlabel("commits_id(chronological)")
        axes.set_ylabel("number of changed (R: ADD, B: MODIFY, G: RENAME)")
        axes.set_title(title)
        return None

    evaluate_report = LinkEvaluator(path_to_db, path_to_csv)
    files = (
        evaluate_report.coordinates_for_files_changes_distribution_of_commits(
        ).commits_count_coordinates)
    classes = (evaluate_report.
               coordinates_for_classes_changes_distribution_of_commits(
               ).commits_count_coordinates)
    methods = (evaluate_report.
               coordinates_for_methods_changes_distribution_of_commits(
               ).commits_count_coordinates)
    test = (
        evaluate_report.coordinates_for_test_changes_distribution_of_commits(
        ).commits_count_coordinates)
    tested = (
        evaluate_report.coordinates_for_tested_changes_distribution_of_commits(
        ).commits_count_coordinates)

    fig = plt.figure(num=5, figsize=(25, 25))
    draw_2d_scatter_for_type_z(fig.add_subplot(511), files,
                               "changes for files", 5)
    draw_2d_scatter_for_type_z(fig.add_subplot(512), classes,
                               "changes for classes", 5)
    draw_2d_scatter_for_type_z(fig.add_subplot(513), methods,
                               "changes for methods", 25)
    draw_2d_scatter_for_type_z(fig.add_subplot(514), test, "changes for test",
                               25)
    draw_2d_scatter_for_type_z(fig.add_subplot(515), tested,
                               "changes for tested", 25)
    plt.show()
Exemple #8
0
def output_to_csv() -> None:
    evaluate_report = LinkEvaluator(path_to_db, path_to_csv)
    evaluate_report.output_predict_to_csv()