def data_summary(df, benchmarks, out_file, caption_prefix=None): """ Summarizes the average values of base features across each benchmark """ if not caption_prefix: caption_prefix = "" df = df[df.benchmark.isin(benchmarks)] df['SAT'] = df['result'] df.loc[df.SAT != "SAT", ['SAT']] = np.nan g = df.groupby("benchmark") res = g.aggregate('count') index = res.index res = res.append(res.sum(numeric_only=True), ignore_index=True) res.index = list(index) + ["Total"] # use simp_num_vars for # time instances, since maplecomsps contains some instances that get simplified away out = res[["simp_num_vars", "simp_lsr_size", "simp_weak_size", "simp_q", "simp_backbones", "simp_tw_upper"]] out.columns = ["Instances", "LSR", "Weak", "Cmty", "Bones", "TW"] with open(out_file, 'w') as o: latex_gen.insert_table(o, out.to_latex(), tabular=True, precomputed=True, tiny=False, caption=caption_prefix + " The number of instances for which" + " we were able to successfully compute each parameter. " + "``Cmty'' refers to the community parameters; " + "``TW'' denotes the treewidth upper bound; " + "``Bones'' denotes backbone size. ") print(out.to_latex())
def q5(out_file): data = [] for c in ["/home/ezulkosk/backdoors_benchmarks/" + i + "/" for i in comps] + \ ["/home/ezulkosk/backdoors_benchmarks/agile/", "/home/ezulkosk/backdoors_benchmarks/crafted/", "/home/ezulkosk/backdoors_benchmarks/random/"]: sense, spec = correlate_backdoors_and_bridges(c) if isinstance(c, list): c = "application" else: c = c.strip("/").split("/")[-1] data.append((c, sense, spec)) print(tabulate(data, headers=["Benchmark", "Sensitivity", "Specificity"], tablefmt="latex")) with open(out_file, 'w') as o: latex_gen.insert_table(o, data, headers=["Benchmark", "Sensitivity", "Specificity"], caption="Bridge/BD Expt.")
def regression(df, benchmarks, out_file, caption_prefix=None): """ Tests if subsets of features correlate with solving time. """ heterogeneous_r2 = regression_helper(df, benchmarks=benchmarks, subsets=[ ["simp_num_vars", "simp_num_clauses", "simp_cvr"], ["simp_num_vars", "simp_num_clauses", "simp_num_cmtys", "simp_q"], ["simp_num_vars", "simp_num_clauses", "simp_lsr_size", "simp_lvr"], ["simp_num_vars", "simp_num_clauses", "simp_num_min_weak", "simp_weak_size"], ["simp_num_vars", "simp_num_clauses", "simp_backbones", "simp_backbonesvr"], ["simp_num_vars", "simp_num_clauses", "simp_tw_upper", "simp_tw_uppervr"] ], rotate=False, grab_all=True, ridge=False) data_types = ["simp_num_vars", "simp_num_clauses", "simp_cvr", # basic "simp_num_cmtys", "simp_q", "simp_qcor", # cmty "simp_lsr_size", "simp_lvr", # lsr "simp_tw_upper", "simp_tw_uppervr" # tw ] df = df[data_types + ['benchmark', 'time']] df = df.dropna() # NOTE: ordered based on significance values best_combined_r2 = regression_helper(df, benchmarks=benchmarks, subsets=[ ["simp_q", "simp_cvr", "simp_lvr", "simp_qcor", "simp_num_clauses"], ["simp_tw_uppervr", "simp_q", "simp_num_cmtys", "simp_tw_upper", "simp_lvr"], ["simp_qcor", "simp_lvr", "simp_num_clauses", "simp_lsr_size", "simp_q"], ["simp_num_cmtys", "simp_tw_uppervr", "simp_cvr", "simp_tw_upper", "simp_q"] ], # ["simp_num_vars", "simp_num_clauses", "simp_tw_upper", "simp_lsr_size", "simp_q", "simp_num_cmtys"], # ["simp_num_vars", "simp_num_clauses", "simp_tw_upper", "simp_tw_uppervr"]], rotate=False, grab_all=True, ridge=False) # best_combined_r2 = regression_helper(df, benchmarks=benchmarks, subset_size_filter=5, rotate=True) rows = heterogeneous_r2 + [["\\hline"]] + best_combined_r2 with open(out_file, 'w') as o: latex_gen.insert_table(o, rows, tiny=False, headers=["Feature Set"] + benchmarks, caption=caption_prefix + " Adjusted R$^2$ values for the given features, " + "compared to log of MapleCOMSPS' solving time. " + "The number in parentheses indicates the number of instances " + "that were considered in each case. The lower section considers " + "heterogeneous sets of features across different parameter types.", label="tab-regressions", tabular=True)
def average_metric_values(df, benchmarks, out_file, caption_prefix=None): """ Do metrics look better for app as opposed to random/crafted? """ if not caption_prefix: caption_prefix = "" df = df[df.benchmark.isin(benchmarks)] df = df[['benchmark', 'simp_lvr', 'simp_wvr', 'simp_q', 'simp_backbonesvr', 'simp_tw_uppervr']] g = df.groupby("benchmark") res = g.aggregate('mean') res2 = g.aggregate('std') res3 = res.combine(res2, lambda x, y: [FSTR.format(i) + " (" + FSTR.format(j) + ")" for i, j in zip(x, y)]) res3.columns = ['LSR/V', 'Weak/V', 'Q', 'Bones/V', 'TW/V'] with open(out_file, 'w') as o: latex_gen.insert_table(o, res3.to_latex(), tabular=True, precomputed=True, tiny=False, caption=caption_prefix + " Mean (std. dev.) of several parameter values. ", label="tab-meanstd")
def lsr_all_decs_comparison(df, benchmarks, out_file, caption_prefix=None): if not caption_prefix: caption_prefix = "" df = df[df.benchmark.isin(benchmarks)] df['simp_lsr_all_decs_overlap_ratio'] = df['simp_lsr_all_decs_intervr'] / df['simp_lsr_all_decs_unionvr'] df = df[['benchmark', 'simp_lvr', 'simp_all_decsvr', 'simp_lsr_all_decs_overlap_ratio']] g = df.groupby("benchmark") res = g.aggregate('mean') res2 = g.aggregate('std') ''' for col in res: res[col] = [np.nan if (not isinstance(val, str) and np.isnan(val)) else (val if isinstance(val, str) else str(float(val))) for val in res[col].tolist()] for col in res2: res2[col] = [np.nan if (not isinstance(val, str) and np.isnan(val)) else (val if isinstance(val, str) else str(float(val))) for val in res2[col].tolist()] ''' # print(res) # print(res2) # print(res.combine(res2, lambda x, y: str(x) + " (" + str(y) + ")")) res3 = res.combine(res2, lambda x, y: [FSTR.format(i) + " (" + FSTR.format(j) + ")" for i, j in zip(x, y)]) res3.columns = ["Laser", "All Decisions", "Overlap Ratio"] print(res3) print(res3.to_latex()) with open(out_file, 'w') as o: latex_gen.insert_table(o, res3.to_latex(), precomputed=True, tiny=False, tabular=True, caption=caption_prefix + " Mean (std. dev.) of Laser produced backdoor sizes " + "versus all decision variables. " + "Overlap Ratio is the size of the set " + "$(Laser \\cap All Decisions) / (Laser \\cup All Decisions)$.", label="lsr_vs_all_decs_table")
def structure_logging_summary(df, benchmarks, out_file, full=False): """ do metrics look better for app as opposed to random/crafted? """ print("Structure logging") out_str = "" df = df[df.benchmark.isin(benchmarks)] # 'struct_gini_normalized_picks', 'struct_ar_gini_normalized_picks', 'struct_nr_gini_normalized_picks', # 'struct_gini_normalized_clauses', 'struct_ar_gini_normalized_clauses', 'struct_nr_gini_normalized_clauses', df = df[['benchmark', 'name', 'struct_lsr', 'struct_ar_lsr', 'struct_nr_lsr', 'simp_maplesat_time', 'simp_maplesat_ar_time', 'simp_maplesat_nr_time', 'simp_maplesat_conflicts', 'simp_maplesat_ar_conflicts', 'simp_maplesat_nr_conflicts', 'struct_avg_clause_lsr', 'struct_ar_avg_clause_lsr', 'struct_nr_avg_clause_lsr']] df = df.dropna() if full: # ['benchmark', # 'struct_gini_normalized_picks', # 'struct_ar_gini_normalized_picks', # 'struct_nr_gini_normalized_picks'], # ['benchmark', # 'struct_gini_normalized_clauses', # 'struct_ar_gini_normalized_clauses', # 'struct_nr_gini_normalized_clauses'], feature_lists = [ ['benchmark', 'struct_lsr', 'struct_ar_lsr', 'struct_nr_lsr'], ['benchmark', 'struct_avg_clause_lsr', 'struct_ar_avg_clause_lsr', 'struct_nr_avg_clause_lsr'], ['benchmark', 'simp_maplesat_conflicts', 'simp_maplesat_ar_conflicts', 'simp_maplesat_nr_conflicts'], ['benchmark', 'simp_maplesat_time', 'simp_maplesat_ar_time', 'simp_maplesat_nr_time'] ] # 'P1: Community-based Spatial Locality of Decisions', # 'P2: Community-based Spatial Locality of Learnt Clauses', expt_name_list = [ 'LSR Size', 'Avg. Clause LSR', 'Num Conflicts', 'Solving Time (s)'] best = ["min", "min", "min", "min"] else: feature_lists = [ ['benchmark', 'struct_lsr', 'struct_ar_lsr', 'struct_nr_lsr'], ['benchmark', 'struct_avg_clause_lsr', 'struct_ar_avg_clause_lsr', 'struct_nr_avg_clause_lsr'], ['benchmark', 'simp_maplesat_conflicts', 'simp_maplesat_ar_conflicts', 'simp_maplesat_nr_conflicts'], ['benchmark', 'simp_maplesat_time', 'simp_maplesat_ar_time', 'simp_maplesat_nr_time'] ] expt_name_list = [ 'LSR Size', 'Avg. Clause LSR', 'Num Conflicts', 'Solving Time (s)'] best = ["min", "min", "min", "min"] end_row = " \\\\ \\hline" out_str += "\\begin{center}\n" out_str += "\\begin{tabular}{ |l|c|c|c| }\n" # header out_str += "\\hline\n" out_str += " & ".join( ["\\textbf{" + i + "}" for i in ["Property", "Luby", "Always Restart", "Never Restart"]]) + end_row + "\n" for l, e, b in zip(feature_lists, expt_name_list, best): df2 = df[l] g = df2.groupby("benchmark") res = g.aggregate('mean') res2 = g.aggregate('std') res3 = res.combine(res2, lambda x, y: [FSTR.format(i) + " (" + FSTR.format(j) + ")" if i <= 1000 else BIG_FSTR.format(i) + " (" + BIG_FSTR.format(j) + ")" for i, j in zip(x, y)]) out_str += e + "& " for index, row in res3.iterrows(): pre = "" post = "\\\\" nums = [float(row[fname].split()[0]) for fname in l[1:]] nums_and_std = [row[fname] for fname in l[1:]] high = -1 low = 9999999 high_index = -1 low_index = -1 for i in range(len(nums)): if str(nums[i]) == "nan": continue else: if nums[i] > high: high = nums[i] high_index = i if nums[i] < low: low = nums[i] low_index = i for i in range(len(nums)): if str(nums[i]) == "nan": continue if b == "min" and low_index == i: nums_and_std[low_index] = "\\textbf{" + nums_and_std[low_index] + "}" elif b == "max" and high_index == i: nums_and_std[high_index] = "\\textbf{" + nums_and_std[high_index] + "}" out_str += pre + " & ".join(str(i) for i in nums_and_std) + post + "\n" out_str += "\\hline\n" out_str += "\\end{tabular}\n" out_str += "\\end{center}\n" with open(out_file, 'w') as o: latex_gen.insert_table(o, out_str, tabular=True, precomputed=True, tiny=False, label="tab_lens", caption="Comparison of LSR measures and solving time for various restart policies" + " on the Agile benchmark. LSR sizes are normalized by the number of variables.")