def _collect_cig_node_data( project_name: str, revision: FullCommitHash) -> tp.List[tp.Dict[str, tp.Any]]: churn_config = ChurnConfig.create_c_style_languages_config() cig = create_blame_interaction_graph(project_name, revision).commit_interaction_graph() commit_lookup = create_commit_lookup_helper(project_name) repo_lookup = get_local_project_gits(project_name) def filter_nodes(node: CommitRepoPair) -> bool: if node.commit_hash == UNCOMMITTED_COMMIT_HASH: return False return bool(commit_lookup(node)) nodes: tp.List[tp.Dict[str, tp.Any]] = [] for node in cig.nodes: node_attrs = tp.cast(CIGNodeAttrs, cig.nodes[node]) commit = node_attrs["commit"] if not filter_nodes(commit): continue _, insertions, _ = calc_commit_code_churn( Path(repo_lookup[commit.repository_name].path), commit.commit_hash, churn_config) if insertions == 0: LOG.warning(f"Churn for commit {commit} is 0.") insertions = 1 nodes.append(({ "commit_hash": commit.commit_hash.hash, "degree": cig.degree(node), "insertions": insertions, })) return nodes
def plot(self, view_mode: bool) -> None: case_study = self.plot_kwargs["case_study"] project_name = case_study.project_name revision = newest_processed_revision_for_case_study( case_study, BlameReport) if not revision: raise PlotDataEmpty() aig = create_blame_interaction_graph( project_name, revision).author_interaction_graph() nodes: tp.List[tp.Dict[str, tp.Any]] = [] for node in aig.nodes: node_attrs = tp.cast(AIGNodeAttrs, aig.nodes[node]) nodes.append(({ "project": project_name, "author": node_attrs["author"], "# Interacting authors": aig.degree(node), "# Commits": node_attrs["num_commits"], })) data = pd.DataFrame(nodes) multivariate_grid("# Commits", "# Interacting authors", "project", data, global_kde=False)
def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: case_study: CaseStudy = self.table_kwargs["case_study"] project_name: str = case_study.project_name revision = newest_processed_revision_for_case_study( case_study, BlameReport) if not revision: raise TableDataEmpty() blame_aig = create_blame_interaction_graph( project_name, revision).author_interaction_graph() file_aig = create_file_based_interaction_graph( project_name, revision).author_interaction_graph() blame_nodes: tp.List[tp.Dict[str, tp.Any]] = [] for node in blame_aig.nodes: node_attrs = tp.cast(AIGNodeAttrs, blame_aig.nodes[node]) blame_neighbors = set(blame_aig.successors(node)).union( blame_aig.predecessors(node)) file_neighbors = set(file_aig.successors(node)).union( file_aig.predecessors(node)) blame_nodes.append(({ "author": f"{node_attrs['author']}", "blame_num_commits": node_attrs['num_commits'], "blame_node_degree": blame_aig.degree(node), "author_diff": len(blame_neighbors.difference(file_neighbors)) })) blame_data = pd.DataFrame(blame_nodes) blame_data.set_index("author", inplace=True) file_nodes: tp.List[tp.Dict[str, tp.Any]] = [] for node in file_aig.nodes: node_attrs = tp.cast(AIGNodeAttrs, file_aig.nodes[node]) file_nodes.append(({ "author": f"{node_attrs['author']}", "file_num_commits": node_attrs['num_commits'], "file_node_degree": file_aig.degree(node) })) file_data = pd.DataFrame(file_nodes) file_data.set_index("author", inplace=True) degree_data = blame_data.join(file_data, how="outer") kwargs: tp.Dict[str, tp.Any] = {} if table_format.is_latex(): kwargs["index"] = True kwargs["multicolumn_format"] = "c" kwargs["multirow"] = True return dataframe_to_table(degree_data, table_format, wrap_table, wrap_landscape=True, **kwargs)
def plot(self, view_mode: bool) -> None: case_studies = get_loaded_paper_config().get_all_case_studies() nodes: tp.List[tp.Dict[str, tp.Any]] = [] project_names: tp.List[str] = [] for case_study in case_studies: project_name = case_study.project_name added_project_name = False revision = newest_processed_revision_for_case_study( case_study, BlameReport) if not revision: continue caig = create_blame_interaction_graph( project_name, revision).commit_author_interaction_graph( outgoing_interactions=True, incoming_interactions=True) authors = len( [1 for node in caig.nodes if caig.nodes[node]["author"]]) for node in caig.nodes: node_attrs = tp.cast(CAIGNodeAttrs, caig.nodes[node]) commit = node_attrs["commit"] if commit: if not added_project_name: project_names.append(project_name) added_project_name = True nodes.append(({ "Project": project_name, "commit": commit.commit_hash, "# Interacting Authors": caig.degree(node) / authors })) data = pd.DataFrame(nodes) ax = sns.violinplot(x="Project", y="# Interacting Authors", data=data, order=sorted(project_names), inner=None, linewidth=1, color=".95") sns.stripplot(x="Project", y="# Interacting Authors", data=data, order=sorted(project_names), alpha=.25, size=3) ax.set_ylim(-0.1, 1.1) ax.set_aspect(0.3 / ax.get_data_ratio()) ax.tick_params(axis='x', labelrotation=45) ax.set_xlabel(None)
def plot(self, view_mode: bool) -> None: case_study = self.plot_kwargs["case_study"] style.use(self.plot_config.style()) fig, axes = plt.subplots(1, 1, sharey="all") fig.subplots_adjust(hspace=0.5) fig.suptitle("Central Code") axes.set_title(case_study.project_name) axes.set_ylabel("Code Centrality") axes.set_xlabel("Commits") project_name = case_study.project_name revision = newest_processed_revision_for_case_study( case_study, BlameReport) if not revision: raise PlotDataEmpty() churn_config = ChurnConfig.create_c_style_languages_config() cig = create_blame_interaction_graph( project_name, revision).commit_interaction_graph() commit_lookup = create_commit_lookup_helper(project_name) repo_lookup = get_local_project_gits(project_name) def filter_nodes(node: CommitRepoPair) -> bool: if node.commit_hash == UNCOMMITTED_COMMIT_HASH: return False return bool(commit_lookup(node)) nodes: tp.List[tp.Dict[str, tp.Any]] = [] for node in cig.nodes: node_attrs = tp.cast(CIGNodeAttrs, cig.nodes[node]) commit = node_attrs["commit"] if not filter_nodes(commit): continue _, insertions, _ = calc_commit_code_churn( Path(repo_lookup[commit.repository_name].path), commit.commit_hash, churn_config) if insertions == 0: LOG.warning(f"Churn for commit {commit} is 0.") insertions = 1 nodes.append(({ "commit_hash": commit.commit_hash, "degree": cig.degree(node), "insertions": insertions, })) data = pd.DataFrame(nodes) data["code_centrality"] = data["degree"] - data["insertions"] data.sort_values(by="code_centrality", inplace=True) centrality_scores = data.loc[:, ["commit_hash", "code_centrality"]] centrality_scores.sort_values(by="code_centrality", inplace=True) axes.plot(centrality_scores["code_centrality"].values) axes.set_ylim(bottom=0)
def plot(self, view_mode: bool) -> None: case_study = self.plot_kwargs["case_study"] project_name = case_study.project_name revision = newest_processed_revision_for_case_study( case_study, BlameReport) if not revision: raise PlotDataEmpty() cig = create_blame_interaction_graph( project_name, revision).commit_interaction_graph() commit_lookup = create_commit_lookup_helper(project_name) repo_lookup = get_local_project_gits(project_name) code_churn_lookup = { repo_name: calc_repo_code_churn(repo, ChurnConfig.create_c_style_languages_config()) for repo_name, repo in repo_lookup.items() } def filter_nodes(node: CommitRepoPair) -> bool: if node.commit_hash == UNCOMMITTED_COMMIT_HASH: return False return bool(commit_lookup(node)) nodes: tp.List[tp.Dict[str, tp.Any]] = [] for node in cig.nodes: node_attrs = tp.cast(CIGNodeAttrs, cig.nodes[node]) commit = node_attrs["commit"] if not filter_nodes(commit): continue _, insertions, _ = code_churn_lookup[commit.repository_name][ commit.commit_hash] nodes.append(({ "Case Study": project_name, "commit_hash": commit.commit_hash, "Commit Size": insertions, "Node Degree": cig.degree(node), })) data = pd.DataFrame(nodes) data = apply_tukeys_fence(data, "Commit Size", 3.0) grid = multivariate_grid("Commit Size", "Node Degree", "Case Study", data, global_kde=False) ax = grid.ax_joint ax.axvline(data["Commit Size"].quantile(0.20), color="#777777", linewidth=3) ax.axhline(data["Node Degree"].quantile(0.80), color="#777777", linewidth=3)
def save(self, plot_dir: Path, filetype: str = 'svg') -> None: project_name = self.plot_kwargs["project"] revision = self.plot_kwargs["revision"] cig = create_blame_interaction_graph( project_name, revision).commit_interaction_graph() nx.set_node_attributes( cig, {node: cig.nodes[node]["commit_hash"] for node in cig.nodes}, "label") # pylint: disable=import-outside-toplevel from networkx.drawing.nx_agraph import write_dot write_dot(cig, plot_dir / self.plot_file_name("dot"))
def _prepare_cig_plotly( project_name: str, revision: FullCommitHash, create_node_info: tp.Callable[[NodeTy, CommitRepoPair, nx.DiGraph], NodeInfoTy], create_edge_info: tp.Callable[[CommitRepoPair, CommitRepoPair, int], EdgeInfoTy] ) -> tp.Tuple[tp.List[tp.Tuple[NodeTy, NodeInfoTy]], tp.List[tp.Tuple[ NodeTy, NodeTy, EdgeInfoTy]]]: commit_lookup = create_commit_lookup_helper(project_name) cig = create_blame_interaction_graph(project_name, revision).commit_interaction_graph() def filter_nodes(node: CommitRepoPair) -> bool: if node.commit_hash == UNCOMMITTED_COMMIT_HASH: return False commit = commit_lookup(node) if not commit: return False # make filter configurable return datetime.utcfromtimestamp(commit.commit_time) >= datetime( 2015, 1, 1) nodes: tp.List[tp.Tuple[NodeTy, NodeInfoTy]] = [] node_meta: tp.Dict[NodeTy, CommitRepoPair] = {} for node in cig.nodes: node_attrs = tp.cast(CIGNodeAttrs, cig.nodes[node]) commit = node_attrs["commit"] if not filter_nodes(commit): continue node_meta[node] = commit nodes.append((node, create_node_info(node, commit, cig))) nodes = sorted( nodes, key=lambda x: int(commit_lookup(node_meta[x[0]]).commit_time)) edges: tp.List[tp.Tuple[NodeTy, NodeTy, EdgeInfoTy]] = [] for source, sink in cig.edges: amount = tp.cast(CIGEdgeAttrs, cig[source][sink])["amount"] source_commit = tp.cast(CIGNodeAttrs, cig.nodes[source])["commit"] sink_commit = tp.cast(CIGNodeAttrs, cig.nodes[sink])["commit"] if not filter_nodes(source_commit) or not filter_nodes(sink_commit): continue edges.append( (source, sink, create_edge_info(source_commit, sink_commit, amount))) return nodes, edges
def plot(self, view_mode: bool) -> None: case_study = self.plot_kwargs["plot_case_study"] style.use(self.plot_config.style()) fig, axes = plt.subplots(1, 1, sharey="all") fig.subplots_adjust(hspace=0.5) fig.suptitle("Author Interaction Graph - Node Degrees") axes.set_title(case_study.project_name) axes.set_ylabel("Degree") axes.set_xlabel("Authors") project_name = case_study.project_name revision = newest_processed_revision_for_case_study( case_study, BlameReport) if not revision: raise PlotDataEmpty() aig = create_blame_interaction_graph( project_name, revision).author_interaction_graph() nodes: tp.List[tp.Dict[str, tp.Any]] = [] for node in aig.nodes: node_attrs = tp.cast(AIGNodeAttrs, aig.nodes[node]) author = node_attrs["author"] nodes.append(({ "author": author, "node_degree": aig.degree(node), "node_out_degree": aig.out_degree(node), "node_in_degree": aig.in_degree(node), })) data = pd.DataFrame(nodes) node_degrees = data.loc[:, ["author", "node_degree"]] node_out_degrees = data.loc[:, ["author", "node_out_degree"]] node_in_degrees = data.loc[:, ["author", "node_in_degree"]] node_degrees.sort_values(by="node_degree", inplace=True) node_out_degrees.sort_values(by="node_out_degree", inplace=True) node_in_degrees.sort_values(by="node_in_degree", inplace=True) axes.plot(node_degrees["node_degree"].values, label="degree") axes.plot(node_out_degrees["node_out_degree"].values, label="out_degree") axes.plot(node_in_degrees["node_in_degree"].values, label="in_degree") axes.legend()
def test_get_author_data(self) -> None: """Check whether author data is retrieved correctly from the author interaction graph.""" vara_cfg()['paper_config']['current_config'] = "test_casestudy_status" load_paper_config() revision = newest_processed_revision_for_case_study( get_paper_config().get_case_studies("xz")[0], BlameReport ) assert revision blame_interaction_graph = create_blame_interaction_graph("xz", revision) self.assertEqual(blame_interaction_graph.project_name, "xz") aig = blame_interaction_graph.author_interaction_graph() author_data = get_author_data(aig, "Lasse Collin") self.assertEqual(author_data["node_attrs"]["author"], "Lasse Collin") self.assertEqual(author_data["neighbors"], set()) self.assertEqual(0, len(author_data["in_attrs"])) self.assertEqual(0, len(author_data["out_attrs"]))
def plot(self, view_mode: bool) -> None: case_study = self.plot_kwargs["plot_case_study"] style.use(self.plot_config.style()) fig, axes = plt.subplots(1, 1, sharey="all") fig.subplots_adjust(hspace=0.5) fig.suptitle("Commit-Author Interaction Graph - # Interacting Authors") axes.set_title(case_study.project_name) axes.set_ylabel("Authors") axes.set_xlabel("Commits") project_name = case_study.project_name revision = newest_processed_revision_for_case_study( case_study, BlameReport) if not revision: raise PlotDataEmpty() caig = create_blame_interaction_graph( project_name, revision).commit_author_interaction_graph() nodes: tp.List[tp.Dict[str, tp.Any]] = [] for node in caig.nodes: node_attrs = tp.cast(CAIGNodeAttrs, caig.nodes[node]) commit = node_attrs["commit"] if commit: nodes.append(({ "commit": commit.commit_hash, "num_authors": caig.degree(node) })) data = pd.DataFrame(nodes) num_authors = data.loc[:, ["commit", "num_authors"]] num_authors.sort_values(by="num_authors", inplace=True) axes.plot(num_authors["num_authors"].values)
def test_blame_interaction_graph(self) -> None: """Test whether blame interaction graphs are created correctly.""" vara_cfg()['paper_config']['current_config'] = "test_casestudy_status" load_paper_config() revision = newest_processed_revision_for_case_study( get_paper_config().get_case_studies("xz")[0], BlameReport ) assert revision blame_interaction_graph = create_blame_interaction_graph("xz", revision) self.assertEqual(blame_interaction_graph.project_name, "xz") cig = blame_interaction_graph.commit_interaction_graph() self.assertEqual(124, len(cig.nodes)) self.assertEqual(928, len(cig.edges)) aig = blame_interaction_graph.author_interaction_graph() self.assertEqual(1, len(aig.nodes)) self.assertEqual(0, len(aig.edges)) caig = blame_interaction_graph.commit_author_interaction_graph() self.assertEqual(125, len(caig.nodes)) self.assertEqual(92, len(caig.edges))
def plot(self, view_mode: bool) -> None: sort = self.plot_kwargs["sort"] case_study = self.plot_kwargs["plot_case_study"] style.use(self.plot_config.style()) fig, axes = plt.subplots(1, 1, sharey="all") fig.subplots_adjust(hspace=0.5) fig.suptitle("Commit Interaction Graph - Node Degrees") axes.set_title(case_study.project_name) axes.set_ylabel("Degree") xlabel = "" if sort == "time": xlabel = "Time (old to new)" elif sort == "degree": xlabel = "Commits" axes.set_xlabel(xlabel) revision = newest_processed_revision_for_case_study( case_study, BlameReport) if not revision: raise PlotDataEmpty() cig = create_blame_interaction_graph( case_study.project_name, revision).commit_interaction_graph() commit_lookup = create_commit_lookup_helper(case_study.project_name) def filter_nodes(node: CommitRepoPair) -> bool: if node.commit_hash == UNCOMMITTED_COMMIT_HASH: return False return bool(commit_lookup(node)) def commit_time(node: CommitRepoPair) -> datetime: return datetime.utcfromtimestamp(commit_lookup(node).commit_time) nodes: tp.List[tp.Dict[str, tp.Any]] = [] for node in cig.nodes: node_attrs = tp.cast(CIGNodeAttrs, cig.nodes[node]) commit = node_attrs["commit"] if not filter_nodes(commit): continue nodes.append(({ "commit_hash": commit.commit_hash, "commit_time": commit_time(commit), "node_degree": cig.degree(node), "node_out_degree": cig.out_degree(node), "node_in_degree": cig.in_degree(node), })) data = pd.DataFrame(nodes) if sort == "time": data.sort_values(by="commit_time", inplace=True) node_degrees = data.loc[:, ["commit_hash", "node_degree"]] node_out_degrees = data.loc[:, ["commit_hash", "node_out_degree"]] node_in_degrees = data.loc[:, ["commit_hash", "node_in_degree"]] if sort == "degree": node_degrees.sort_values(by="node_degree", inplace=True) node_out_degrees.sort_values(by="node_out_degree", inplace=True) node_in_degrees.sort_values(by="node_in_degree", inplace=True) axes.plot(node_degrees["node_degree"].values, label="degree") axes.plot(node_out_degrees["node_out_degree"].values, label="out_degree") axes.plot(node_in_degrees["node_in_degree"].values, label="in_degree") axes.legend()
def plot(self, view_mode: bool) -> None: case_studies = get_loaded_paper_config().get_all_case_studies() diff_data: tp.List[pd.DataFrame] = [] project_names: tp.List[str] = [] for case_study in case_studies: project_name = case_study.project_name revision = newest_processed_revision_for_case_study( case_study, BlameReport) if not revision: continue project_names.append(project_name) blame_aig = create_blame_interaction_graph( project_name, revision).author_interaction_graph() file_aig = create_file_based_interaction_graph( project_name, revision).author_interaction_graph() file_nodes: tp.List[tp.Dict[str, tp.Any]] = [] for node in file_aig.nodes: node_attrs = tp.cast(AIGNodeAttrs, file_aig.nodes[node]) if blame_aig.has_node(node): blame_neighbors = set(blame_aig.successors(node)).union( blame_aig.predecessors(node)) else: blame_neighbors = set() file_neighbors = set(file_aig.successors(node)).union( file_aig.predecessors(node)) file_nodes.append(({ "Project": project_name, "author": f"{node_attrs['author']}", "# Additional Authors": len(blame_neighbors.difference(file_neighbors)) })) file_data = pd.DataFrame(file_nodes) file_data.set_index("author", inplace=True) diff_data.append(file_data) data = pd.concat(diff_data) ax = sns.violinplot(x="Project", y="# Additional Authors", data=data, order=sorted(project_names), inner=None, linewidth=1, color=".95") sns.stripplot(x="Project", y="# Additional Authors", data=data, order=sorted(project_names), alpha=.25, size=3) ax.set_ylim(bottom=0, top=1.1 * data["# Additional Authors"].max()) ax.set_aspect(0.3 / ax.get_data_ratio()) ax.tick_params(axis='x', labelrotation=45) ax.set_xlabel(None)
def create_graph(project_name: str, revision: FullCommitHash) -> nx.DiGraph: return create_blame_interaction_graph( project_name, revision).commit_author_interaction_graph()