def get_modules_from_db(repo_ids): repo_ids = [r.replace("_", "@") for r in repo_ids] ms: List[Module] = Module.select() \ .join(Repo) \ .group_by(Repo.name) \ .where(Repo.id << repo_ids) return ms
def draw_count_test_cases(): ms: List[Module] = Module.select() \ .join(Repo) \ .group_by(Repo.name) \ .order_by(Module.total_cases.desc()) df = pd.DataFrame(list(ms.dicts())) f, ax = plt.subplots(figsize=(10, 5)) bin_column_name = "bin_tc" column_to_bin = "total_cases" bins = [1, 2, 6, 10, 20, 50, 100, 2000] bin_labels = [f"[{l}-{r}]" for l, r in zip(bins[:-1], bins[1:])] kw = { bin_column_name: lambda x: pd.cut(x[column_to_bin], bins=bins, labels=bin_labels) } df: pd.DataFrame = df.assign(**kw) color = sns.color_palette()[0] sns.countplot(data=df, x=bin_column_name, color=color) x_label = "Suite size" y_label = "Repositories count" ax.set(xlabel=x_label, ylabel=y_label) total = len(df) # plt.title(f"Repositories by test suite size (total: {total})") plt.tight_layout() plt.savefig("good_repos_by_suite_size_bin.png", dpi=300, transparent=transparent_bg)
def get_modules_from_db(repo_ids): to_id = lambda s: s[:-41] + "@" + s[-40:] repo_ids = [to_id(r) for r in repo_ids] print(repo_ids) ms: List[Module] = Module.select() \ .join(Repo) \ .where(Repo.id << repo_ids) return ms
def draw_coverage_goals(): ms: List[Module] = Module.select() \ .join(Repo) \ .group_by(Repo.name) \ .order_by(Module.total_cases.desc()) df = pd.DataFrame(list(ms.dicts())) humanize = lambda l: HUMAN_READABLE_LABELS[l] df["all_pairs"] = df[["m_pairs", "im_pairs", "ic_pairs"]].sum(axis=1) for col, readable_col in HUMAN_READABLE_LABELS.items(): df[readable_col] = df[col] df[col] = None coverage_goals_cols = ["statements", "branches", "all_pairs"] pairs_cols = ["m_pairs", "im_pairs", "ic_pairs"] coverage_goals_cols = list(map(humanize, coverage_goals_cols)) # coverage goals f, ax = plt.subplots(figsize=(10, 5)) sns.boxplot(data=df.loc[:, coverage_goals_cols]) if for_presentation: make_box_plot_white(ax) x_label = "Goal types" y_label = "Goals count" ax.set(xlabel=x_label, ylabel=y_label) plt.tight_layout() plt.savefig("coverage_goals_all_unique.png", dpi=300, transparent=transparent_bg) # pair detailed pairs_cols = list(map(humanize, pairs_cols)) f, ax = plt.subplots(figsize=(10, 5)) sns.boxplot(data=df.loc[:, pairs_cols]) if for_presentation: make_box_plot_white(ax) x_label = "Definition-use pair types" y_label = "Pairs count" ax.set(xlabel=x_label, ylabel=y_label) plt.tight_layout() plt.savefig("pairs_detailed_all_unique.png", dpi=300, transparent=transparent_bg)
pragmas={ "journal_mode": "wal", "cache_size": -64 * 1000, "foreign_key": 1, "ignore_check_constraints": 9, "synchronous": 0, } ) DATABASE_PROXY.initialize(db) ms: List[Module] = Module.select() \ .where(Module.st_cov > 70) \ .where(Module.du_cov > 70) \ .where(Module.br_cov > 70) \ .where(Module.bugs < 8) \ .where(Module.statements > 50) \ .where(Module.total_cases > 20) \ .where(Module.total_cases < 1000) \ .where(Module.is_full_cfg == 1) \ .group_by(Module.path) \ .order_by(Module.total_cases.desc()) \ # .limit(10) print(len(list(ms))) # exit() print("\n".join(list( map(lambda t: " ".join(t), zip( map(str, map(operator.attrgetter("path"), ms)), map(str, map(operator.attrgetter("path"), ms)), map(str, map(operator.attrgetter("total_cases"), ms)), map(str, map(operator.attrgetter("statements"), ms))
database = "../selection.db" db = SqliteExtDatabase(database, pragmas={ "journal_mode": "wal", "cache_size": -64 * 1000, "foreign_key": 1, "ignore_check_constraints": 9, "synchronous": 0, }) DATABASE_PROXY.initialize(db) # .where(Module.total_cases < 100) \ # .where(Module.bugs < 8) \ ms: List[Module] = Module.select() \ .join(Repo) \ .group_by(Repo.name) \ .order_by(Module.total_cases.desc()) \ .offset(off) \ .limit(lim) ms = list(ms) repos = [] for m in ms: repo_id = m.repo.id repo_id = repo_id.replace("@", "_") df = read_combined_df(data_path, repo_id, DataFrameType.FIXED_SIZE) if df is not None: # print(df) metrics = df[METRIC].unique() if len(metrics) < 3: continue by_metric = df.groupby(METRIC)[SUITE_COVERAGE].agg(["min", "max"])