Beispiel #1
def data_summary(df, benchmarks, out_file, caption_prefix=None):
    Summarizes the average values of base features across each benchmark
    if not caption_prefix:
        caption_prefix = ""
    df = df[df.benchmark.isin(benchmarks)]
    df['SAT'] = df['result']
    df.loc[df.SAT != "SAT", ['SAT']] = np.nan
    g = df.groupby("benchmark")
    res = g.aggregate('count')
    index = res.index
    res = res.append(res.sum(numeric_only=True), ignore_index=True)
    res.index = list(index) + ["Total"]
    # use simp_num_vars for # time instances, since maplecomsps contains some instances that get simplified away
    out = res[["simp_num_vars", "simp_lsr_size", "simp_weak_size", "simp_q", "simp_backbones", "simp_tw_upper"]]
    out.columns = ["Instances", "LSR", "Weak", "Cmty", "Bones", "TW"]

    with open(out_file, 'w') as o:
        latex_gen.insert_table(o, out.to_latex(), tabular=True, precomputed=True, tiny=False,
                               caption=caption_prefix + " The number of instances for which" +
                               " we were able to successfully compute each parameter. " +
                               "``Cmty'' refers to the community parameters; " +
                               "``TW'' denotes the treewidth upper bound; " +
                               "``Bones'' denotes backbone size. ")
Beispiel #2
def q5(out_file):
    data = []
    for c in ["/home/ezulkosk/backdoors_benchmarks/" + i + "/" for i in comps] + \
            ["/home/ezulkosk/backdoors_benchmarks/agile/", "/home/ezulkosk/backdoors_benchmarks/crafted/",
        sense, spec = correlate_backdoors_and_bridges(c)
        if isinstance(c, list):
            c = "application"
            c = c.strip("/").split("/")[-1]
        data.append((c, sense, spec))
        print(tabulate(data, headers=["Benchmark", "Sensitivity", "Specificity"], tablefmt="latex"))

    with open(out_file, 'w') as o:
        latex_gen.insert_table(o, data, headers=["Benchmark", "Sensitivity", "Specificity"], caption="Bridge/BD Expt.")
Beispiel #3
def regression(df, benchmarks, out_file, caption_prefix=None):
    Tests if subsets of features correlate with solving time.
    heterogeneous_r2 = regression_helper(df, benchmarks=benchmarks, subsets=[
        ["simp_num_vars", "simp_num_clauses", "simp_cvr"],
        ["simp_num_vars", "simp_num_clauses", "simp_num_cmtys", "simp_q"],
        ["simp_num_vars", "simp_num_clauses", "simp_lsr_size", "simp_lvr"],
        ["simp_num_vars", "simp_num_clauses", "simp_num_min_weak", "simp_weak_size"],
        ["simp_num_vars", "simp_num_clauses", "simp_backbones", "simp_backbonesvr"],
        ["simp_num_vars", "simp_num_clauses", "simp_tw_upper", "simp_tw_uppervr"]
                                         rotate=False, grab_all=True, ridge=False)

    data_types = ["simp_num_vars", "simp_num_clauses", "simp_cvr",  # basic
                  "simp_num_cmtys", "simp_q", "simp_qcor",  # cmty
                  "simp_lsr_size", "simp_lvr",  # lsr
                  "simp_tw_upper", "simp_tw_uppervr"  # tw

    df = df[data_types + ['benchmark', 'time']]
    df = df.dropna()

    # NOTE: ordered based on significance values
    best_combined_r2 = regression_helper(df, benchmarks=benchmarks, subsets=[
        ["simp_q", "simp_cvr", "simp_lvr", "simp_qcor", "simp_num_clauses"],
        ["simp_tw_uppervr", "simp_q", "simp_num_cmtys", "simp_tw_upper", "simp_lvr"],
        ["simp_qcor", "simp_lvr", "simp_num_clauses", "simp_lsr_size", "simp_q"],
        ["simp_num_cmtys", "simp_tw_uppervr", "simp_cvr", "simp_tw_upper", "simp_q"]
    # ["simp_num_vars", "simp_num_clauses", "simp_tw_upper", "simp_lsr_size", "simp_q", "simp_num_cmtys"],
    # ["simp_num_vars", "simp_num_clauses", "simp_tw_upper", "simp_tw_uppervr"]],
                                 rotate=False, grab_all=True, ridge=False)

    # best_combined_r2 = regression_helper(df, benchmarks=benchmarks, subset_size_filter=5, rotate=True)
    rows = heterogeneous_r2 + [["\\hline"]] + best_combined_r2

    with open(out_file, 'w') as o:
        latex_gen.insert_table(o, rows, tiny=False, headers=["Feature Set"] + benchmarks,
                               caption=caption_prefix + " Adjusted R$^2$ values for the given features, "
                               + "compared to log of MapleCOMSPS' solving time. "
                               + "The number in parentheses indicates the number of instances "
                               + "that were considered in each case. The lower section considers "
                               + "heterogeneous sets of features across different parameter types.",
                               label="tab-regressions", tabular=True)
Beispiel #4
def average_metric_values(df, benchmarks, out_file, caption_prefix=None):
    Do metrics look better for app as opposed to random/crafted?
    if not caption_prefix:
        caption_prefix = ""
    df = df[df.benchmark.isin(benchmarks)]
    df = df[['benchmark', 'simp_lvr', 'simp_wvr', 'simp_q', 'simp_backbonesvr', 'simp_tw_uppervr']]
    g = df.groupby("benchmark")
    res = g.aggregate('mean')
    res2 = g.aggregate('std')
    res3 = res.combine(res2, lambda x, y: [FSTR.format(i) + " (" + FSTR.format(j) + ")" for i, j in zip(x, y)])
    res3.columns = ['LSR/V', 'Weak/V', 'Q', 'Bones/V', 'TW/V']

    with open(out_file, 'w') as o:
        latex_gen.insert_table(o, res3.to_latex(), tabular=True, precomputed=True, tiny=False,
                               caption=caption_prefix + " Mean (std. dev.) of several parameter values. ",
Beispiel #5
def lsr_all_decs_comparison(df, benchmarks, out_file, caption_prefix=None):
    if not caption_prefix:
        caption_prefix = ""
    df = df[df.benchmark.isin(benchmarks)]
    df['simp_lsr_all_decs_overlap_ratio'] = df['simp_lsr_all_decs_intervr'] / df['simp_lsr_all_decs_unionvr']
    df = df[['benchmark', 'simp_lvr', 'simp_all_decsvr', 'simp_lsr_all_decs_overlap_ratio']]

    g = df.groupby("benchmark")
    res = g.aggregate('mean')
    res2 = g.aggregate('std')

    for col in res:
        res[col] = [np.nan if (not isinstance(val, str) and np.isnan(val)) else
                   (val if isinstance(val, str) else str(float(val)))
                   for val in res[col].tolist()]

    for col in res2:
        res2[col] = [np.nan if (not isinstance(val, str) and np.isnan(val)) else
                   (val if isinstance(val, str) else str(float(val)))
                   for val in res2[col].tolist()]
    # print(res)
    # print(res2)
    # print(res.combine(res2, lambda x, y: str(x) + " (" + str(y) + ")"))
    res3 = res.combine(res2, lambda x, y: [FSTR.format(i) + " (" + FSTR.format(j) + ")" for i, j in zip(x, y)])
    res3.columns = ["Laser", "All Decisions", "Overlap Ratio"]


    with open(out_file, 'w') as o:
        latex_gen.insert_table(o, res3.to_latex(), precomputed=True, tiny=False, tabular=True,
                               caption=caption_prefix + " Mean (std. dev.) of Laser produced backdoor sizes " +
                               "versus all decision variables. " +
                               "Overlap Ratio is the size of the set " +
                               "$(Laser \\cap All Decisions) / (Laser \\cup All Decisions)$.",
Beispiel #6
def structure_logging_summary(df, benchmarks, out_file, full=False):
    do metrics look better for app as opposed to random/crafted?
    print("Structure logging")
    out_str = ""

    df = df[df.benchmark.isin(benchmarks)]

    # 'struct_gini_normalized_picks', 'struct_ar_gini_normalized_picks', 'struct_nr_gini_normalized_picks',
    # 'struct_gini_normalized_clauses', 'struct_ar_gini_normalized_clauses', 'struct_nr_gini_normalized_clauses',
    df = df[['benchmark', 'name', 'struct_lsr', 'struct_ar_lsr', 'struct_nr_lsr',
             'simp_maplesat_time', 'simp_maplesat_ar_time', 'simp_maplesat_nr_time',
             'simp_maplesat_conflicts', 'simp_maplesat_ar_conflicts', 'simp_maplesat_nr_conflicts',
             'struct_avg_clause_lsr', 'struct_ar_avg_clause_lsr', 'struct_nr_avg_clause_lsr']]
    df = df.dropna()
    if full:
        # ['benchmark',
        # 'struct_gini_normalized_picks',
        # 'struct_ar_gini_normalized_picks',
        # 'struct_nr_gini_normalized_picks'],
        # ['benchmark',
        # 'struct_gini_normalized_clauses',
        # 'struct_ar_gini_normalized_clauses',
        # 'struct_nr_gini_normalized_clauses'],
        feature_lists = [
            ['benchmark', 'struct_lsr', 'struct_ar_lsr', 'struct_nr_lsr'],
            ['benchmark', 'struct_avg_clause_lsr', 'struct_ar_avg_clause_lsr', 'struct_nr_avg_clause_lsr'],
            ['benchmark', 'simp_maplesat_conflicts', 'simp_maplesat_ar_conflicts', 'simp_maplesat_nr_conflicts'],
            ['benchmark', 'simp_maplesat_time', 'simp_maplesat_ar_time', 'simp_maplesat_nr_time']

        # 'P1: Community-based Spatial Locality of Decisions',
        # 'P2: Community-based Spatial Locality of Learnt Clauses',
        expt_name_list = [
            'LSR Size',
            'Avg. Clause LSR',
            'Num Conflicts',
            'Solving Time (s)']
        best = ["min", "min", "min", "min"]
        feature_lists = [
            ['benchmark', 'struct_lsr', 'struct_ar_lsr', 'struct_nr_lsr'],
            ['benchmark', 'struct_avg_clause_lsr', 'struct_ar_avg_clause_lsr', 'struct_nr_avg_clause_lsr'],
            ['benchmark', 'simp_maplesat_conflicts', 'simp_maplesat_ar_conflicts', 'simp_maplesat_nr_conflicts'],
            ['benchmark', 'simp_maplesat_time', 'simp_maplesat_ar_time', 'simp_maplesat_nr_time']

        expt_name_list = [
            'LSR Size',
            'Avg. Clause LSR',
            'Num Conflicts',
            'Solving Time (s)']
        best = ["min", "min", "min", "min"]

    end_row = " \\\\ \\hline"

    out_str += "\\begin{center}\n"
    out_str += "\\begin{tabular}{ |l|c|c|c| }\n"

    # header
    out_str += "\\hline\n"
    out_str += " & ".join(
        ["\\textbf{" + i + "}" for i in ["Property", "Luby", "Always Restart", "Never Restart"]]) + end_row + "\n"

    for l, e, b in zip(feature_lists, expt_name_list, best):
        df2 = df[l]
        g = df2.groupby("benchmark")
        res = g.aggregate('mean')
        res2 = g.aggregate('std')
        res3 = res.combine(res2, lambda x, y: [FSTR.format(i) + " (" + FSTR.format(j) + ")"
                                               if i <= 1000
                                               else BIG_FSTR.format(i) + " (" + BIG_FSTR.format(j) + ")"
                                               for i, j in zip(x, y)])
        out_str += e + "& "
        for index, row in res3.iterrows():
            pre = ""
            post = "\\\\"
            nums = [float(row[fname].split()[0]) for fname in l[1:]]
            nums_and_std = [row[fname] for fname in l[1:]]
            high = -1
            low = 9999999
            high_index = -1
            low_index = -1

            for i in range(len(nums)):
                if str(nums[i]) == "nan":
                    if nums[i] > high:
                        high = nums[i]
                        high_index = i
                    if nums[i] < low:
                        low = nums[i]
                        low_index = i

            for i in range(len(nums)):
                if str(nums[i]) == "nan":

                if b == "min" and low_index == i:
                    nums_and_std[low_index] = "\\textbf{" + nums_and_std[low_index] + "}"
                elif b == "max" and high_index == i:
                    nums_and_std[high_index] = "\\textbf{" + nums_and_std[high_index] + "}"

            out_str += pre + " & ".join(str(i) for i in nums_and_std) + post + "\n"
    out_str += "\\hline\n"
    out_str += "\\end{tabular}\n"
    out_str += "\\end{center}\n"
    with open(out_file, 'w') as o:
        latex_gen.insert_table(o, out_str, tabular=True, precomputed=True, tiny=False, label="tab_lens",
                               caption="Comparison of LSR measures and solving time for various restart policies" +
                                       " on the Agile benchmark. LSR sizes are normalized by the number of variables.")