def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: case_studies = get_paper_config().get_all_case_studies() variables = [ "churn", "num_interactions", "num_interacting_commits", "num_interacting_authors" ] cs_data = [ BlameDiffMetricsDatabase.get_data_for_project( case_study.project_name, ["revision", *variables], get_commit_map(case_study.project_name), case_study) for case_study in case_studies ] for data in cs_data: data.set_index('revision', inplace=True) data.drop(data[data['churn'] == 0].index, inplace=True) correlations = [ data[variables].corr(method="pearson") for data in cs_data ] df = pd.concat(correlations, axis=1, keys=get_unique_cs_name(case_studies)) kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True} if table_format.is_latex(): kwargs["multicolumn_format"] = "c" return dataframe_to_table(df, table_format, wrap_table, wrap_landscape=False, **kwargs)
def plot(self, view_mode: bool) -> None: """Plot the current plot to a file.""" case_studies: tp.List[CaseStudy] = self.plot_kwargs["case_study"] var_x = self.plot_kwargs["var_x"].value var_y = self.plot_kwargs["var_y"].value data = [(case_study, BlameDiffMetricsDatabase.get_data_for_project( case_study.project_name, ["revision", var_x, var_y], get_commit_map(case_study.project_name), case_study)) for case_study in case_studies] def normalize(values: pd.Series) -> pd.Series: max_value = values.max() min_value = values.min() return tp.cast(pd.Series, (values - min_value) / (max_value - min_value)) dataframes = [] for case_study, df in data: df[var_x] = normalize(df[var_x]) df[var_y] = normalize(df[var_y]) df["project"] = case_study.project_name dataframes.append(df) sns.set(style="ticks", color_codes=True) df = pd.concat(dataframes) df.set_index('revision', inplace=True) if "churn" in df: df.drop(df[df.churn == 0].index, inplace=True) multivariate_grid(x_col=var_x, y_col=var_y, hue='project', data=df)
def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: project_name = self.table_kwargs["case_study"].project_name szz_tool: SZZTool = self.table_kwargs["szz_tool"] commit_map = get_commit_map(project_name) columns = { "revision": "fix", "introducer": "introducer", "score": "score" } if szz_tool == SZZTool.PYDRILLER_SZZ: data = PyDrillerSZZQualityMetricsDatabase.get_data_for_project( project_name, list(columns.keys()), commit_map ) elif szz_tool == SZZTool.SZZ_UNLEASHED: data = SZZUnleashedQualityMetricsDatabase.get_data_for_project( project_name, list(columns.keys()), commit_map ) else: raise ValueError(f"Unknown SZZ tool '{szz_tool.tool_name}'") data.rename(columns=columns, inplace=True) data.set_index(["fix", "introducer"], inplace=True) data.sort_values("score", inplace=True) data.sort_index(level="fix", sort_remaining=False, inplace=True) kwargs: tp.Dict[str, tp.Any] = {} if table_format.is_latex(): kwargs["multicolumn_format"] = "c" kwargs["longtable"] = True return dataframe_to_table( data, table_format, wrap_table, wrap_landscape=True, **kwargs )
def revisions_of_project(self) -> None: """Generate the Revision list for the selected project if select specific is enabled.""" self.strategie_forms.setCurrentIndex( GenerationStrategie.SELECT_REVISION.value) if self.selected_project != self.revision_list_project: self.revision_details.setText("Loading Revisions") self.revision_details.repaint() get_local_project_git(self.selected_project).remotes[0].fetch() git_path = get_local_project_git_path(self.selected_project) initial_commit = get_initial_commit(git_path).hash commits = get_all_revisions_between(initial_commit, 'HEAD', FullCommitHash, git_path) commit_lookup_helper = create_commit_lookup_helper( self.selected_project) project = get_project_cls_by_name(self.selected_project) repo_name = get_primary_project_source(self.selected_project).local commits = map(lambda commit: CommitRepoPair(commit, repo_name), commits) cmap = get_commit_map(self.selected_project) commit_model = CommitTableModel( list(map(commit_lookup_helper, commits)), cmap, project) self.proxy_model.setSourceModel(commit_model) self.revision_list_project = self.selected_project self.revision_details.clear() self.revision_details.update()
def plot(self, view_mode: bool) -> None: project_name: str = self.plot_kwargs["case_study"].project_name revision = get_commit_map(project_name).convert_to_full_or_warn( ShortCommitHash(self.plot_kwargs["revision"])) def create_node_data(node: NodeTy, commit: CommitRepoPair, cig: nx.DiGraph) -> ChordPlotNodeInfo: del node del cig return {"info": commit.commit_hash.short_hash, "color": 1} def create_edge_data(source_commit: CommitRepoPair, sink_commit: CommitRepoPair, amount: int) -> ChordPlotEdgeInfo: return { "size": amount, "color": 1, "info": f"{source_commit.commit_hash.short_hash} " f"--{{{amount}}}--> " f"{sink_commit.commit_hash.short_hash}" } nodes, edges = _prepare_cig_plotly(project_name, revision, create_node_data, create_edge_data) figure = make_chord_plot(nodes, edges, "Commit Interaction Graph") if view_mode: figure.show() else: offply.plot(figure, filename=self.plot_file_name("html"))
def plot(self, view_mode: bool) -> None: style.use(self.plot_config.style()) case_study: CaseStudy = self.plot_kwargs["case_study"] project_name = case_study.project_name commit_map: CommitMap = get_commit_map(project_name) data = BlameInteractionDatabase.get_data_for_project( project_name, [ "revision", "time_id", "IN_HEAD_Interactions", "OUT_HEAD_Interactions", "HEAD_Interactions" ], commit_map, case_study) data = filter_non_code_changes(data, project_name) if data.empty: raise PlotDataEmpty data.sort_values(by=['time_id'], inplace=True) fig = plt.figure() fig.subplots_adjust(top=0.95, hspace=0.05, right=0.95, left=0.07) grid_spec = fig.add_gridspec(3, 1) main_axis = fig.add_subplot(grid_spec[:-1, :]) main_axis.set_title("Gini coefficient over the project lifetime") main_axis.get_xaxis().set_visible(False) churn_axis = fig.add_subplot(grid_spec[2, :], sharex=main_axis) unique_rev_strs: tp.List[str] = [rev.hash for rev in data['revision']] draw_gini_blame_over_time(main_axis, data, unique_rev_strs, True, True, self.plot_config.line_width()) draw_gini_blame_over_time(main_axis, data, unique_rev_strs, True, False, self.plot_config.line_width()) draw_gini_blame_over_time(main_axis, data, unique_rev_strs, False, True, self.plot_config.line_width()) draw_gini_churn_over_time(main_axis, data, unique_rev_strs, project_name, commit_map, True, True, self.plot_config.line_width()) draw_gini_churn_over_time(main_axis, data, unique_rev_strs, project_name, commit_map, True, False, self.plot_config.line_width()) draw_gini_churn_over_time(main_axis, data, unique_rev_strs, project_name, commit_map, False, True, self.plot_config.line_width()) main_axis.legend() main_axis.set_ylim((0., 1.)) draw_interaction_code_churn(churn_axis, data, project_name, commit_map) # Adapt axis to draw nicer plots for x_label in churn_axis.get_xticklabels(): x_label.set_fontsize(self.plot_config.x_tick_size()) x_label.set_rotation(270) x_label.set_fontfamily('monospace')
def _gen_overview_data(tag_blocked: bool, **kwargs: tp.Any) -> tp.Dict[str, tp.List[int]]: case_study: CaseStudy = kwargs["case_study"] project_name = case_study.project_name commit_map: CommitMap = get_commit_map(project_name) project = get_project_cls_by_name(project_name) if 'report_type' in kwargs: result_file_type: tp.Type[BaseReport] = kwargs['report_type'] else: result_file_type = EmptyReport positions: tp.Dict[str, tp.List[int]] = { "background": [], "blocked": [], "blocked_all": [], "compile_error": [], "failed": [], "missing": [], "success": [] } for c_hash, index in commit_map.mapping_items(): if not case_study.has_revision(ShortCommitHash(c_hash)): positions["background"].append(index) if hasattr(project, "is_blocked_revision" ) and project.is_blocked_revision(c_hash)[0]: positions["blocked_all"].append(index) revisions = FileStatusDatabase.get_data_for_project( project_name, ["revision", "time_id", "file_status"], commit_map, case_study, result_file_type=result_file_type, tag_blocked=tag_blocked) positions["success"] = ( revisions[revisions["file_status"] == FileStatusExtension.SUCCESS. get_status_extension()])["time_id"].tolist() positions["failed"] = ( revisions[revisions["file_status"] == FileStatusExtension.FAILED. get_status_extension()])["time_id"].tolist() positions["blocked"] = ( revisions[revisions["file_status"] == FileStatusExtension.BLOCKED. get_status_extension()])["time_id"].tolist() positions["blocked_all"].extend( (revisions[revisions["file_status"] == FileStatusExtension.BLOCKED. get_status_extension()])["time_id"].tolist()) positions["missing"] = ( revisions[revisions["file_status"] == FileStatusExtension.MISSING. get_status_extension()])["time_id"].tolist() positions["compile_error"] = ( revisions[revisions["file_status"] == FileStatusExtension. COMPILE_ERROR.get_status_extension()])["time_id"].tolist() return positions
def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: case_studies = get_loaded_paper_config().get_all_case_studies() cs_data: tp.List[pd.DataFrame] = [] for case_study in case_studies: project_name = case_study.project_name commit_map = get_commit_map(project_name) project_cls = get_project_cls_by_name(project_name) project_repo = get_local_project_git(project_name) project_path = project_repo.path[:-5] project_git = git["-C", project_path] revisions = sorted( case_study.revisions, key=commit_map.time_id, reverse=True ) revision = revisions[0] rev_range = revision.hash if revision else "HEAD" cs_dict = { project_name: { "Domain": str(project_cls.DOMAIN)[0].upper() + str(project_cls.DOMAIN)[1:], "LOC": calc_repo_loc(project_repo, rev_range), "Commits": int(project_git("rev-list", "--count", rev_range)), "Authors": len( project_git("shortlog", "-s", rev_range).splitlines() ) } } if revision: cs_dict[project_name]["Revision"] = revision.short_hash cs_data.append(pd.DataFrame.from_dict(cs_dict, orient="index")) df = pd.concat(cs_data).sort_index() kwargs: tp.Dict[str, tp.Any] = {"bold_rows": True} if table_format.is_latex(): kwargs["multicolumn_format"] = "c" kwargs["multirow"] = True return dataframe_to_table( df, table_format, wrap_table, wrap_landscape=True, **kwargs )
def plot(self, view_mode: bool) -> None: style.use(self.plot_config.style()) case_study: CaseStudy = self.plot_kwargs['case_study'] project_name: str = case_study.project_name commit_map: CommitMap = get_commit_map(project_name) _, axis = plt.subplots() draw_code_churn( axis, project_name, commit_map, case_study.has_revision if case_study else lambda x: True) for x_label in axis.get_xticklabels(): x_label.set_fontsize(self.plot_config.x_tick_size()) x_label.set_rotation(270) x_label.set_fontfamily('monospace')
def plot(self, view_mode: bool) -> None: style.use(self.plot_config.style()) data = _gen_overview_data(self.plot_kwargs["show_blocked"], **self.plot_kwargs) fig_width = 4 dot_to_inch = 0.01389 line_width = 0.75 _, axis = plt.subplots(1, 1, figsize=(fig_width, 1)) commit_map: CommitMap = get_commit_map( self.plot_kwargs["case_study"].project_name) linewidth = (fig_width / len( commit_map.mapping_items())) / dot_to_inch * line_width axis.eventplot(data["background"], linewidths=linewidth, colors=BACKGROUND_COLOR) axis.eventplot(data["success"], linewidths=linewidth, colors=SUCCESS_COLOR) axis.eventplot(data["failed"], linewidths=linewidth, colors=FAILED_COLOR) axis.eventplot(data["missing"], linewidths=linewidth, colors=MISSING_COLOR) axis.eventplot(data["compile_error"], linewidths=linewidth, colors=COMPILE_ERROR_COLOR) if self.plot_kwargs["show_all_blocked"]: axis.eventplot(data["blocked_all"], linewidths=linewidth, colors=BLOCKED_COLOR) else: axis.eventplot(data["blocked"], linewidths=linewidth, colors=BLOCKED_COLOR) axis.set_axis_off()
def newest_processed_revision_for_case_study( case_study: CaseStudy, result_file_type: tp.Type[BaseReport]) -> tp.Optional[FullCommitHash]: """ Computes the newest revision of this case study that has been processed. Args: case_study: to work on result_file_type: report type of the result files Returns: the newest processed revision if available """ processed_revisions = processed_revisions_for_case_study( case_study, result_file_type) if not processed_revisions: return None commit_map = get_commit_map(case_study.project_name) processed_revisions.sort(key=commit_map.time_id, reverse=True) return processed_revisions[0]
def setUp(cls) -> None: """Initialize projects and set up report paths, a case study, and a commit map.""" initialize_projects() cls.br_paths_list = [ TEST_INPUTS_DIR / Path( "results/xz/BRE-BR-xz-xz-2f0bc9cd40" "_9e238675-ee7c-4325-8e9f-8ccf6fd3f05c_success.yaml" ), TEST_INPUTS_DIR / Path( "results/xz/BRE-BR-xz-xz-c5c7ceb08a" "_77a6c5bc-e5c7-4532-8814-70dbcc6b5dda_success.yaml" ), TEST_INPUTS_DIR / Path( "results/xz/BRE-BR-xz-xz-ef364d3abc" "_feeeecb2-1826-49e5-a188-d4d883f06d00_success.yaml" ), TEST_INPUTS_DIR / Path( "results/TwoLibsOneProjectInteractionDiscreteLibsSingleProject/" "BRE-BR-TwoLibsOneProjectInteractionDiscreteLibsSingleProject-" "elementalist-5e8fe1616d_11ca651c-2d41-42bd-aa4e-8c37ba67b75f" "_success.yaml" ), TEST_INPUTS_DIR / Path( "results/TwoLibsOneProjectInteractionDiscreteLibsSingleProject/" "BRE-BR-TwoLibsOneProjectInteractionDiscreteLibsSingleProject-" "elementalist-e64923e69e_0b22c10c-4adb-4885-b3d2-416749b53aa8" "_success.yaml" ) ] cls.case_study = load_case_study_from_file( TEST_INPUTS_DIR / Path( "paper_configs/test_blame_diff_metrics_database/" "TwoLibsOneProjectInteractionDiscreteLibsSingleProject_0." "case_study" ) ) cls.commit_map = get_commit_map( "TwoLibsOneProjectInteractionDiscreteLibsSingleProject" )
def calc_missing_revisions( self, boundary_gradient: float) -> tp.Set[FullCommitHash]: case_study: CaseStudy = self.plot_kwargs["case_study"] project_name: str = case_study.project_name commit_map: CommitMap = get_commit_map(project_name) def gen_revision_df(**plot_kwargs: tp.Any) -> DataFrame: result_file_type: tp.Type[BaseReport] = plot_kwargs.get( "report_type", EmptyReport) # load data frame = FileStatusDatabase.get_data_for_project( project_name, ["revision", "time_id", "file_status"], commit_map, case_study, result_file_type=result_file_type, tag_blocked=True) return frame revision_df = gen_revision_df(**self.plot_kwargs) revision_df.sort_values(by=['time_id'], inplace=True) def head_cm_neighbours(lhs_cm: ShortCommitHash, rhs_cm: ShortCommitHash) -> bool: return commit_map.short_time_id( lhs_cm) + 1 == commit_map.short_time_id(rhs_cm) def should_insert_revision(last_row: tp.Any, row: tp.Any) -> tp.Tuple[bool, float]: return last_row["file_status"] != row["file_status"], 1.0 def get_commit_hash(row: tp.Any) -> ShortCommitHash: return ShortCommitHash(str(row["revision"])) return find_missing_revisions(revision_df.iterrows(), get_local_project_git_path(project_name), commit_map, should_insert_revision, get_commit_hash, head_cm_neighbours)
def plot(self, view_mode: bool) -> None: """Plot the current plot to a file.""" case_study: CaseStudy = self.plot_kwargs["case_study"] project_name: str = case_study.project_name commit_map: CommitMap = get_commit_map(project_name) sns.set(style="ticks", color_codes=True) variables = [ "churn", "num_interactions", "num_interacting_commits", "num_interacting_authors" ] df = BlameDiffMetricsDatabase.get_data_for_project( project_name, ["revision", "time_id", *variables], commit_map, case_study) df.set_index('revision', inplace=True) df.drop(df[df.churn == 0].index, inplace=True) if df.empty or len(df.index) < 2: raise PlotDataEmpty df.sort_values(by=['time_id'], inplace=True) if LOG.isEnabledFor(logging.INFO): for x_var in variables: for y_var in variables: if x_var != y_var: log_interesting_revisions(x_var, y_var, df.copy()) grid = sns.PairGrid(df, vars=variables) grid.map_diag(_hist) grid.map_offdiag(logit_scatterplot) grid.map_offdiag(annotate_correlation) plt.subplots_adjust(top=0.9) fig_title_default = f"Correlation matrix - Project {project_name}" grid.fig.suptitle(self.plot_config.fig_title(fig_title_default))
def _get_named_df_for_case_study( case_study: CaseStudy, opt_level: OptLevel, plot_kwargs: tp.Dict[str, tp.Any] ) -> tp.Optional[tp.Dict[str, tp.Union[str, pd.DataFrame]]]: project_name = case_study.project_name commit_map = get_commit_map(project_name) verifier_plot_df = BlameVerifierReportDatabase.get_data_for_project( project_name, [ "revision", "time_id", "opt_level", "total", "successful", "failed", "undetermined" ], commit_map, case_study ) # Filter results for current optimization level verifier_plot_df = verifier_plot_df.loc[verifier_plot_df['opt_level'] == opt_level.value] if verifier_plot_df.empty or len( verifier_plot_df['revision'].unique() ) == 0: if len(plot_kwargs["case_study"]) > 1: return None # Need more than one data point LOG.warning( f"No data found for project {project_name} with optimization level " f"{opt_level.value}" ) raise PlotDataEmpty named_verifier_df: tp.Dict[str, tp.Union[str, pd.DataFrame]] = { "project_name": project_name, "dataframe": verifier_plot_df } return named_verifier_df
def _load_dataframe_for_report( project_name: str, cache_id: str, columns: tp.List[str], commit_map: CommitMap, szz_report: SZZReport ) -> pd.DataFrame: commit_lookup = create_commit_lookup_helper(project_name) commit_map = get_commit_map(project_name) prj_src = get_primary_project_source(project_name) def create_dataframe_layout() -> pd.DataFrame: df_layout = pd.DataFrame(columns=columns) return df_layout def create_data_frame_for_report( report_paths: tp.Tuple[Path, Path] ) -> tp.Tuple[pd.DataFrame, str, str]: # Look-up commit and infos about the HEAD commit of the report fix_report = load_blame_report(report_paths[0]) intro_report = load_blame_report(report_paths[1]) fix_commit = commit_lookup( CommitRepoPair( commit_map.convert_to_full_or_warn(fix_report.head_commit), prj_src.local ) ) intro_commit = commit_lookup( CommitRepoPair( commit_map.convert_to_full_or_warn(intro_report.head_commit), prj_src.local ) ) fix_in, fix_out = get_interacting_commits_for_commit( fix_report, CommitRepoPair( FullCommitHash.from_pygit_commit(fix_commit), prj_src.local ) ) intro_in, intro_out = get_interacting_commits_for_commit( intro_report, CommitRepoPair( FullCommitHash.from_pygit_commit(intro_commit), prj_src.local ) ) score = _calculate_szz_quality_score( fix_in, fix_out, intro_in, intro_out ) return ( pd.DataFrame({ 'revision': str(fix_report.head_commit), 'time_id': commit_map.short_time_id(fix_report.head_commit), 'introducer': str(intro_report.head_commit), 'score': score }, index=[0]), id_from_paths(report_paths), timestamp_from_paths(report_paths) ) report_map = _get_requested_report_paths(project_name, szz_report) available_revisions = report_map.keys() new_entries: tp.List[tp.Tuple[Path, Path]] = [] remove_entries: tp.List[tp.Tuple[Path, Path]] = [] bugs = szz_report.get_all_raw_bugs() for bug in bugs: fix = bug.fixing_commit.to_short_commit_hash() if fix in available_revisions: for introducer in bug.introducing_commits: intro = introducer.to_short_commit_hash() if intro in available_revisions: new_entries.append((report_map[fix], report_map[intro])) # cls.CACHE_ID is set by superclass # pylint: disable=E1101 data_frame = build_cached_report_table( cache_id, project_name, new_entries, remove_entries, create_dataframe_layout, create_data_frame_for_report, id_from_paths, timestamp_from_paths, compare_timestamps ) return data_frame
def plot(self, view_mode: bool) -> None: style.use(self.plot_config.style()) case_study: CaseStudy = self.plot_kwargs['case_study'] project_name: str = case_study.project_name commit_map = get_commit_map(project_name) fig = plt.figure() fig.subplots_adjust(top=0.95, hspace=0.05, right=0.95, left=0.07) grid_spec = fig.add_gridspec(3, 2) main_axis = fig.add_subplot(grid_spec[:-1, :1]) main_axis.set_title("Lorenz curve for incoming commit interactions") main_axis.get_xaxis().set_visible(False) main_axis_r = fig.add_subplot(grid_spec[:-1, -1]) main_axis_r.set_title("Lorenz curve for outgoing commit interactions") main_axis_r.get_xaxis().set_visible(False) churn_axis = fig.add_subplot(grid_spec[2, :1], sharex=main_axis) churn_axis_r = fig.add_subplot(grid_spec[2, -1], sharex=main_axis_r) data = BlameInteractionDatabase.get_data_for_project( project_name, [ "revision", "time_id", "IN_HEAD_Interactions", "OUT_HEAD_Interactions", "HEAD_Interactions" ], commit_map, case_study) data = filter_non_code_changes(data, project_name) if data.empty: raise PlotDataEmpty unique_rev_strs: tp.List[str] = [rev.hash for rev in data['revision']] # Draw left side of the plot draw_interaction_lorenz_curve(main_axis, data, unique_rev_strs, True, False, self.plot_config.line_width()) draw_perfect_lorenz_curve(main_axis, unique_rev_strs, self.plot_config.line_width()) draw_interaction_code_churn(churn_axis, data, project_name, commit_map) # Draw right side of the plot draw_interaction_lorenz_curve(main_axis_r, data, unique_rev_strs, False, True, self.plot_config.line_width()) draw_perfect_lorenz_curve(main_axis_r, unique_rev_strs, self.plot_config.line_width()) draw_interaction_code_churn(churn_axis_r, data, project_name, commit_map) # Adapt axis to draw nicer plots for x_label in churn_axis.get_xticklabels(): x_label.set_fontsize(self.plot_config.x_tick_size()) x_label.set_rotation(270) x_label.set_fontfamily('monospace') for x_label in churn_axis_r.get_xticklabels(): x_label.set_fontsize(self.plot_config.x_tick_size()) x_label.set_rotation(270) x_label.set_fontfamily('monospace')