def test_aig_file_vs_blame_degrees_table(self) -> None:
        """
        Tests the latex booktabs format for the file vs.

        ci table.
        """
        vara_cfg()["paper_config"][
            "current_config"] = "test_diff_correlation_overview_table"
        initialize_projects()
        load_paper_config()

        # latex booktabs is default format
        table_str = AuthorBlameVsFileDegreesTable(
            TableConfig.from_kwargs(view=False),
            case_study=get_loaded_paper_config().get_case_studies(
                "xz")[0]).tabulate(TableFormat.LATEX_BOOKTABS, False)

        self.assertEqual(
            r"""\begin{tabular}{lrrrrr}
\toprule
{} &  blame\_num\_commits &  blame\_node\_degree &  author\_diff &  file\_num\_commits &  file\_node\_degree \\
author         &                    &                    &              &                   &                   \\
\midrule
Alexey Tourbin &                NaN &                NaN &          NaN &                 1 &                 2 \\
Ben Boeckel    &                NaN &                NaN &          NaN &                 1 &                 2 \\
Jim Meyering   &                NaN &                NaN &          NaN &                 1 &                 2 \\
Lasse Collin   &              124.0 &                0.0 &          0.0 &               479 &                 6 \\
\bottomrule
\end{tabular}
""", table_str)
 def value_to_string(
     value: tp.Union[CaseStudy, tp.List[CaseStudy]]
 ) -> tp.Union[str, tp.List[str]]:
     if isinstance(value, tp.List):
         pc = get_loaded_paper_config()
         if value == pc.get_all_case_studies():
             return "all"
         return [f"{cs.project_name}_{cs.version}" for cs in value]
     return f"{value.project_name}_{value.version}"
    def plot(self, view_mode: bool) -> None:
        case_studies = get_loaded_paper_config().get_all_case_studies()

        nodes: tp.List[tp.Dict[str, tp.Any]] = []
        project_names: tp.List[str] = []
        for case_study in case_studies:
            project_name = case_study.project_name
            added_project_name = False
            revision = newest_processed_revision_for_case_study(
                case_study, BlameReport)
            if not revision:
                continue

            caig = create_blame_interaction_graph(
                project_name, revision).commit_author_interaction_graph(
                    outgoing_interactions=True, incoming_interactions=True)

            authors = len(
                [1 for node in caig.nodes if caig.nodes[node]["author"]])

            for node in caig.nodes:
                node_attrs = tp.cast(CAIGNodeAttrs, caig.nodes[node])
                commit = node_attrs["commit"]

                if commit:
                    if not added_project_name:
                        project_names.append(project_name)
                        added_project_name = True
                    nodes.append(({
                        "Project":
                        project_name,
                        "commit":
                        commit.commit_hash,
                        "# Interacting Authors":
                        caig.degree(node) / authors
                    }))

        data = pd.DataFrame(nodes)
        ax = sns.violinplot(x="Project",
                            y="# Interacting Authors",
                            data=data,
                            order=sorted(project_names),
                            inner=None,
                            linewidth=1,
                            color=".95")
        sns.stripplot(x="Project",
                      y="# Interacting Authors",
                      data=data,
                      order=sorted(project_names),
                      alpha=.25,
                      size=3)
        ax.set_ylim(-0.1, 1.1)
        ax.set_aspect(0.3 / ax.get_data_ratio())
        ax.tick_params(axis='x', labelrotation=45)
        ax.set_xlabel(None)
 def string_to_value(
     str_value: tp.Union[str, tp.List[str]]
 ) -> tp.Union[CaseStudy, tp.List[CaseStudy]]:
     pc = get_loaded_paper_config()
     if isinstance(str_value, tp.List):
         return [
             cs for cs_name in str_value
             for cs in pc.get_case_studies(cs_name)
         ]
     if str_value == "all":
         return pc.get_all_case_studies()
     return pc.get_case_studies(str_value)[0]
Exemple #5
0
    def plot_file_name(self, filetype: str) -> str:
        """
        Get the file name this plot will be stored to when calling save.

        Args:
            filetype: the file type for the plot

        Returns:
            the file name the plot will be stored to
        """
        pc_name = get_loaded_paper_config().path.name
        var_x = self.plot_kwargs['var_x'].value
        var_y = self.plot_kwargs['var_y'].value
        return f"{pc_name}_{self.name}_{var_x}_vs_{var_y}.{filetype}"
Exemple #6
0
    def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str:
        case_studies = get_loaded_paper_config().get_all_case_studies()

        df = pd.DataFrame()

        for case_study in case_studies:
            project_name = case_study.project_name

            report_files = get_processed_revisions_files(
                project_name, TimeReportAggregate,
                get_case_study_file_name_filter(case_study), False)

            for report_file in report_files:
                time_aggregated = TimeReportAggregate(report_file)
                report_name = time_aggregated.filename

                mean_runtime = np.mean(
                    time_aggregated.measurements_wall_clock_time)
                std_runtime = np.std(
                    time_aggregated.measurements_wall_clock_time)
                mean_ctx = np.mean(time_aggregated.measurements_ctx_switches)
                std_ctx = np.std(time_aggregated.measurements_ctx_switches)

                new_row = {
                    "Binary": report_name.binary_name,
                    "Experiment": report_name.experiment_shorthand,
                    "Runtime Mean (Std)":
                    f"{mean_runtime:.2f} ({std_runtime:.2f})",
                    "Ctx-Switches Mean (Std)":
                    f"{mean_ctx:.2f} ({std_ctx:.2f})"
                }

                df = df.append(new_row, ignore_index=True)

        df.sort_values(["Binary", "Experiment"], inplace=True)
        df.set_index(
            ["Binary", "Experiment"],
            inplace=True,
        )

        kwargs: tp.Dict[str, tp.Any] = {}
        if table_format.is_latex():
            kwargs["column_format"] = "llrr"

        return dataframe_to_table(df,
                                  table_format,
                                  wrap_table,
                                  wrap_landscape=True,
                                  **kwargs)
    def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str:
        case_studies = get_loaded_paper_config().get_all_case_studies()

        cs_data: tp.List[pd.DataFrame] = []
        for case_study in case_studies:
            project_name = case_study.project_name
            commit_map = get_commit_map(project_name)
            project_cls = get_project_cls_by_name(project_name)
            project_repo = get_local_project_git(project_name)
            project_path = project_repo.path[:-5]
            project_git = git["-C", project_path]

            revisions = sorted(
                case_study.revisions, key=commit_map.time_id, reverse=True
            )
            revision = revisions[0]
            rev_range = revision.hash if revision else "HEAD"

            cs_dict = {
                project_name: {
                    "Domain":
                        str(project_cls.DOMAIN)[0].upper() +
                        str(project_cls.DOMAIN)[1:],
                    "LOC":
                        calc_repo_loc(project_repo, rev_range),
                    "Commits":
                        int(project_git("rev-list", "--count", rev_range)),
                    "Authors":
                        len(
                            project_git("shortlog", "-s",
                                        rev_range).splitlines()
                        )
                }
            }
            if revision:
                cs_dict[project_name]["Revision"] = revision.short_hash

            cs_data.append(pd.DataFrame.from_dict(cs_dict, orient="index"))

        df = pd.concat(cs_data).sort_index()

        kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True}
        if table_format.is_latex():
            kwargs["multicolumn_format"] = "c"
            kwargs["multirow"] = True

        return dataframe_to_table(
            df, table_format, wrap_table, wrap_landscape=True, **kwargs
        )
Exemple #8
0
    def test_cli_option_converter(self):
        """Test whether CLI option conversion works correctly."""
        # setup config
        vara_cfg()['paper_config']['current_config'] = "test_artefacts_driver"
        load_paper_config()
        save_config()

        plot_generator = CaseStudyOverviewGenerator(
            PlotConfig.from_kwargs(view=False),
            report_type=EmptyReport,
            case_study=get_loaded_paper_config().get_case_studies("xz")[0])
        artefact = PlotArtefact.from_generator("CS Overview", plot_generator,
                                               CommonPlotOptions.from_kwargs())
        artefact_dict = artefact.get_dict()
        self.assertEqual("xz_0", artefact_dict["case_study"])
        self.assertEqual("EmptyReport", artefact_dict["report_type"])
    def test_caig_metrics_table(self) -> None:
        """Tests the latex booktabs format for the caig metrics table."""
        vara_cfg()["paper_config"][
            "current_config"] = "test_diff_correlation_overview_table"
        initialize_projects()
        load_paper_config()

        # latex booktabs is default format
        table_str = CommitAuthorInteractionGraphMetricsTable(
            TableConfig.from_kwargs(view=False),
            case_study=get_loaded_paper_config().get_all_case_studies(
            )).tabulate(TableFormat.LATEX_BOOKTABS, False)

        self.assertEqual(
            r"""\begin{tabular}{lrrrrrrrrrrrrrr}
\toprule
{} & commits & authors & nodes & edges & \multicolumn{4}{c}{node degree} & \multicolumn{3}{c}{node out degree} & \multicolumn{3}{c}{node in degree} \\
{} &        mean & median & min & max &          median & min & max &         median & min & max \\
\midrule
\textbf{xz} &    1143 &      28 &   125 &    92 &        1.47 &    1.0 &   0 &  92 &             1.0 &   0 &   1 &            0.0 &   0 &  92 \\
\bottomrule
\end{tabular}
""", table_str)
    def test_one_case_study_latex_booktabs(self) -> None:
        """Tests the latex booktabs format for the code centrality metrics
        table."""
        vara_cfg()["paper_config"][
            "current_config"] = "test_diff_correlation_overview_table"
        initialize_projects()
        load_paper_config()

        # latex booktabs is default format
        table_str = TopCentralCodeCommitsTable(
            TableConfig.from_kwargs(view=False),
            case_study=get_loaded_paper_config().get_case_studies("xz")[0],
            num_commits=10).tabulate(TableFormat.LATEX_BOOKTABS, False)

        self.assertEqual(
            r"""\begin{table}
\centering
\caption{Top 10 Central Code Commits}
\begin{tabular}{lr}
\toprule
                                  commit &  centrality \\
\midrule
ef68dd4a92976276304de2aedfbe34ae91a86abb &          28 \\
57597d42ca1740ad506437be168d800a50f1a0ad &          16 \\
ea00545beace5b950f709ec21e46878e0f448678 &          16 \\
7f0a4c50f4a374c40acf4b86848f301ad1e82d34 &          15 \\
c15c42abb3c8c6e77c778ef06c97a4a10b8b5d00 &          15 \\
fa3ab0df8ae7a8a1ad55b52266dc0fd387458671 &          10 \\
1d924e584b146136989f48c13fff2632896efb3d &           9 \\
d8b41eedce486d400f701b757b7b5e4e32276618 &           8 \\
1b0ac0c53c761263e91e34195cb21dfdcfeac0bd &           6 \\
e0ea6737b03e83ccaff4514d00e31bb926f8f0f3 &           6 \\
\bottomrule
\end{tabular}
\end{table}
""", table_str)
    def plot(self, view_mode: bool) -> None:
        case_studies = get_loaded_paper_config().get_all_case_studies()

        diff_data: tp.List[pd.DataFrame] = []
        project_names: tp.List[str] = []
        for case_study in case_studies:
            project_name = case_study.project_name
            revision = newest_processed_revision_for_case_study(
                case_study, BlameReport)
            if not revision:
                continue

            project_names.append(project_name)

            blame_aig = create_blame_interaction_graph(
                project_name, revision).author_interaction_graph()
            file_aig = create_file_based_interaction_graph(
                project_name, revision).author_interaction_graph()

            file_nodes: tp.List[tp.Dict[str, tp.Any]] = []
            for node in file_aig.nodes:
                node_attrs = tp.cast(AIGNodeAttrs, file_aig.nodes[node])

                if blame_aig.has_node(node):
                    blame_neighbors = set(blame_aig.successors(node)).union(
                        blame_aig.predecessors(node))
                else:
                    blame_neighbors = set()

                file_neighbors = set(file_aig.successors(node)).union(
                    file_aig.predecessors(node))

                file_nodes.append(({
                    "Project":
                    project_name,
                    "author":
                    f"{node_attrs['author']}",
                    "# Additional Authors":
                    len(blame_neighbors.difference(file_neighbors))
                }))
            file_data = pd.DataFrame(file_nodes)
            file_data.set_index("author", inplace=True)
            diff_data.append(file_data)

        data = pd.concat(diff_data)
        ax = sns.violinplot(x="Project",
                            y="# Additional Authors",
                            data=data,
                            order=sorted(project_names),
                            inner=None,
                            linewidth=1,
                            color=".95")
        sns.stripplot(x="Project",
                      y="# Additional Authors",
                      data=data,
                      order=sorted(project_names),
                      alpha=.25,
                      size=3)
        ax.set_ylim(bottom=0, top=1.1 * data["# Additional Authors"].max())
        ax.set_aspect(0.3 / ax.get_data_ratio())
        ax.tick_params(axis='x', labelrotation=45)
        ax.set_xlabel(None)