def samples_distribution(data) -> None: freqd = {} for dp in data: gen, sam = dp.generation_id, dp.sample hsm = crypto.sha256_str(sam) if gen in freqd: if hsm in freqd[gen]: freqd[gen][hsm] += 1 else: freqd[gen][hsm] = 1 else: freqd[gen] = {} freqd[gen][hsm] = 1 for k, v in freqd.items(): gdict = {} for samp, freq in v.items(): if freq in gdict: gdict[freq] += 1 else: gdict[freq] = 1 freqd[k] = (list(gdict.keys()), list(gdict.values())) plt.GrouppedBars( groups=freqd, # Dict[Dict[int, int]] plot_name="freq_samples_per_gen", path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent, title="Repetition of samples per generation", x_name="# of repetitions", ) return
def input_samples_distribution(data) -> None: # 1) Frequency per generation. # x-axis: times occured, y-axis: how many samples did hit these freq. # One group of these distributions per generation. freqd = {} for dp in data: gen, f = dp.generation_id, dp.frequency if gen in freqd: if f in freqd[gen]: freqd[gen][f] += 1 else: freqd[gen][f] = 1 else: freqd[gen] = {} freqd[gen][f] = 1 for k, v in freqd.items(): freqd[k] = (list(v.keys()), list(v.values())) plt.GrouppedBars( groups=freqd, # Dict[Dict[int, int]] plot_name="freq_input_samples_per_gen", path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent, title="Repetition of input/samples pair per generation", x_name="# of repetitions", ) return
def score_direction_categorical_distrib_per_rel_hlen(data) -> None: """ Groupped bars of a) better, b) same, c) worse score, per absolute hole length unit. """ abshl = [] score_ds = [] groups = { 'better score': {}, 'worse score': {}, 'same score': {}, } normalizers = { 'better score': 0, 'worse score': 0, 'same score': 0, } max_abs = 0 for dp in data: try: rhl = dp.rel_hole_lengths rounded = int(100 * float(rhl)) max_abs = max(max_abs, rounded) sd = dp.score_delta if not math.isinf(sd): if sd > 0: k = 'worse score' normalizers[k] += 1 elif sd < 0: k = 'better score' normalizers[k] += 1 else: k = 'same score' normalizers[k] += 1 if str(rounded) not in groups[k]: groups[k][str(rounded)] = 1 else: groups[k][str(rounded)] += 1 except Exception as e: continue for k, v in groups.items(): for rhlk, rhlv in v.items(): groups[k][rhlk] = groups[k][rhlk] / normalizers[k] for k, v in groups.items(): groups[k] = (list(v.keys()), list(v.values())) plt.GrouppedBars( groups=groups, # Dict[Dict[int, int]] plot_name="score_cat_distrib_rel_hlen", path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent, title= "Score Direction Distribution per Category VS Relative Hole Length", x_name="Size of Hole %", )
def KAverageScore(**kwargs) -> None: """ Compare the average of top-K closest per target benchmark for all different database groups. """ db_groups = kwargs.get('db_groups') target = kwargs.get('targets') feature_space = kwargs.get('feature_space') top_k = kwargs.get('top_k') unique_code = kwargs.get('unique_code', False) plot_config = kwargs.get('plot_config') workspace_path = kwargs.get('workspace_path') groups = {} benchmarks = target.get_benchmarks(feature_space) target_origin_dists = {} for dbg in db_groups: if not (dbg.db_type == samples_database.SamplesDatabase or dbg.db_type == encoded.EncodedContentFiles or dbg.db_type == clsmith.CLSmithDatabase): raise ValueError("Scores require SamplesDatabase or EncodedContentFiles but received", dbg.db_type) groups[dbg.group_name] = ([], []) for benchmark in tqdm.tqdm(benchmarks, total = len(benchmarks), desc = "Benchmarks"): groups[dbg.group_name][0].append(benchmark.name) # Find shortest distances. if unique_code: get_data = lambda x: dbg.get_unique_data_features(x) else: get_data = lambda x: dbg.get_data_features(x) distances = workers.SortedDistances(get_data(feature_space), benchmark.features, feature_space) # Compute target's distance from O(0,0) assert len(distances) != 0, "Sorted src list for {} is empty!".format(dbg.group_name) avg_dist = sum(distances[:top_k]) / top_k if benchmark.name in target_origin_dists: target_origin_dists[benchmark.name] = max(target_origin_dists[benchmark.name], avg_dist) else: target_origin_dists[benchmark.name] = max(math.sqrt(sum([x**2 for x in benchmark.features.values()])), avg_dist) # groups[dbg.group_name][1].append(100 * ((target_origin_dist - avg_dist) / target_origin_dist)) groups[dbg.group_name][1].append(avg_dist) for group_name, tup in groups.items(): bench_names, raw_dists = tup for idx, (bench_name, raw_dist) in enumerate(zip(bench_names, raw_dists)): groups[group_name][1][idx] = 100 * ( (target_origin_dists[bench_name] - raw_dist ) / target_origin_dists[bench_name]) plotter.GrouppedBars( groups = groups, plot_name = "avg_{}_dist_{}_{}".format(top_k, feature_space.replace("Features", " Features"), '-'.join([dbg.group_name for dbg in db_groups])), title = "{}".format(feature_space.replace("Features", " Features")), path = workspace_path, **plot_config if plot_config else {}, ) return
def score_vs_token_delta(data) -> None: # 5) Bar plot: 6 linear combinations of sign of token delta and score delta (neg, pos, 0.0). groups = { 'better score': [['token delta > 0', 'token delta < 0', 'token delta == 0'], [0, 0, 0]], 'worse score': [['token delta > 0', 'token delta < 0', 'token delta == 0'], [0, 0, 0]], 'same score': [['token delta > 0', 'token delta < 0', 'token delta == 0'], [0, 0, 0]], } nsum = 0 for dp in data: td = dp.hole_ind_length - sum( [int(x) for x in dp.abs_hole_lengths.split(',') if x]) sd = dp.score_delta if not math.isinf(dp.score_delta) else None if sd is not None and td is not None: nsum += 1 if sd < 0: if td > 0: groups['better score'][1][0] += 1 elif td < 0: groups['better score'][1][1] += 1 else: groups['better score'][1][2] += 1 elif sd > 0: if td > 0: groups['worse score'][1][0] += 1 elif td < 0: groups['worse score'][1][1] += 1 else: groups['worse score'][1][2] += 1 else: if td > 0: groups['same score'][1][0] += 1 elif td < 0: groups['same score'][1][1] += 1 else: groups['same score'][1][2] += 1 for k, v in groups.items(): for idx, nv in enumerate(v[1]): groups[k][1][idx] = 100 * (nv / nsum) plt.GrouppedBars( groups=groups, plot_name="token_score_deltas", path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent, title="Sample Frequency % VS token & score delta", x_name="category", ) return
def score_per_rel_hlen(data) -> None: """ Groupped bars of a) better, b) same, c) worse score, per absolute hole length unit. """ abshl = [] score_ds = [] groups = { 'better score': {}, 'worse score': {}, 'same score': {}, } max_abs = 0 for dp in data: try: rhl = dp.rel_hole_lengths rounded = int(100 * float(rhl)) max_abs = max(max_abs, rounded) sd = dp.score_delta if not math.isinf(sd): if sd > 0: k = 'worse score' elif sd < 0: k = 'better score' else: k = 'same score' if str(rounded) not in groups[k]: groups[k][str(rounded)] = 1 else: groups[k][str(rounded)] += 1 except Exception as e: continue for l in range(0, max_abs): total = 0 for k, v in groups.items(): if str(l) in v: total += v[str(l)] for k, v in groups.items(): if str(l) in v: groups[k][str(l)] = 100 * (v[str(l)] / total) for k, v in groups.items(): groups[k] = (list(v.keys()), list(v.values())) plt.GrouppedBars( groups=groups, # Dict[Dict[int, int]] plot_name="score_per_rel_hlen", path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent, title="Score Direction (%) per Relative Hole Length", x_name="Size of Hole %", )
def comp_vs_len_indices_over_len_input(data) -> None: """ Groupped bars of a) better, b) same, c) worse score, per absolute hole length unit. """ abshl = [] score_ds = [] groups = { 'compile': {}, 'not-compile': {}, } max_len_ind = 0.0 for dp in data: try: len_ratio = dp.hole_ind_length / len( [int(x) for x in dp.encoded_input_ids.split(',') if x]) len_ratio = round(len_ratio, 1) max_len_ind = max(max_len_ind, len_ratio) cs = dp.compile_status if cs == 1: k = 'compile' else: k = 'not-compile' if str(len_ratio) not in groups[k]: groups[k][str(len_ratio)] = 1 else: groups[k][str(len_ratio)] += 1 except Exception as e: logger.error(e) continue # for l in range(0, max_len_ind): # total = 0 # for k, v in groups.items(): # if str(l) in v: # total += v[str(l)] # for k, v in groups.items(): # if str(l) in v: # groups[k][str(l)] = 100 * (v[str(l)] / total) for k, v in groups.items(): groups[k] = (list(v.keys()), list(v.values())) plt.GrouppedBars( groups=groups, # Dict[Dict[int, int]] plot_name="comp_per_indices_input_len_ratio", path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent, title="Compilability VS (Length of Indices / Length of Input)", x_name="Length of Indices / Length of Input", ) return
def comp_vs_num_tokens(data) -> None: """ Groupped bars of a) better, b) same, c) worse score, per absolute hole length unit. """ abshl = [] score_ds = [] groups = { 'compile': {}, 'not-compile': {}, } max_numtok = 0 for dp in data: try: numtok = dp.num_tokens max_numtok = max(max_numtok, numtok) cs = dp.compile_status if cs == 1: k = 'compile' else: k = 'not-compile' if str(numtok) not in groups[k]: groups[k][str(numtok)] = 1 else: groups[k][str(numtok)] += 1 except Exception as e: logger.error(e) continue for l in range(0, max_numtok): total = 0 for k, v in groups.items(): if str(l) in v: total += v[str(l)] for k, v in groups.items(): if str(l) in v: groups[k][str(l)] = 100 * (v[str(l)] / total) for k, v in groups.items(): groups[k] = (list(v.keys()), list(v.values())) plt.GrouppedBars( groups=groups, # Dict[Dict[int, int]] plot_name="comp_per_len_sample", path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent, title="Compilability % VS Length of Sample", x_name="Length of Sample", ) return
def comp_vs_token_delta(data) -> None: # 6) Bar plot: 4 linear combinations of compilability and token delta. groups = { 'token delta > 0': [['compile', 'not-compile'], [0, 0]], 'token delta < 0': [['compile', 'not-compile'], [0, 0]], 'token delta == 0': [['compile', 'not-compile'], [0, 0]], } nsum = 0 for dp in data: td = dp.hole_ind_length - sum( [int(x) for x in dp.abs_hole_lengths.split(',') if x]) cs = dp.compile_status if td is not None and cs is not None: nsum += 1 if td > 0: if cs == 1: groups['token delta > 0'][1][0] += 1 else: groups['token delta > 0'][1][1] += 1 elif td < 0: if cs == 1: groups['token delta < 0'][1][0] += 1 else: groups['token delta < 0'][1][1] += 1 else: if cs == 1: groups['token delta == 0'][1][0] += 1 else: groups['token delta == 0'][1][1] += 1 for k, v in groups.items(): for idx, nv in enumerate(v[1]): groups[k][1][idx] = 100 * (nv / nsum) plt.GrouppedBars( groups=groups, plot_name="comp_token_delta", path=pathlib.Path(FLAGS.eval_cand_db).absolute().parent, title="Sample Frequency % VS Compilability & token delta", x_name="category", ) return
def ExecutionTimesLocalSize() -> None: """ Iterate over multiple global sizes and collect the execution time for transferring to CPU and GPU and executing kernel on CPU and GPU and report groupped bar plot. """ global src1 global src2 MIN_LISZE_POW = 0 MAX_LSIZE_POW = 21 GSIZE_POW = 21 N_RUNS = 10**2 for n, src in [("src1", src1), ("src2", src2)]: labels = { 'CPU': { 'data': [], 'names': None }, 'GPU': { 'data': [], 'names': None }, } groups = { 'cpu_transfer': [[], []], 'cpu_kernel': [[], []], 'gpu_transfer': [[], []], 'gpu_kernel': [[], []], } for lsize_pow in range(MIN_LISZE_POW, MAX_LSIZE_POW + 1): print("##########", lsize_pow, 2**lsize_pow) cpt, cpk, gpt, gpk = opencl.CLDriveExecutionTimes( src, num_runs=N_RUNS, gsize=2**GSIZE_POW, lsize=2**lsize_pow) if cpt is None: while cpt is None: cpt, cpk, gpt, gpk = opencl.CLDriveExecutionTimes( src, num_runs=N_RUNS, gsize=2**GSIZE_POW, lsize=2**lsize_pow) print(cpt.mean(), cpk.mean(), gpt.mean(), gpk.mean()) if not math.isnan(cpt.mean()): groups['cpu_transfer'][0].append(lsize_pow) groups['cpu_transfer'][1].append(cpt.mean() / (10**6)) if not math.isnan(cpk.mean()): groups['cpu_kernel'][0].append(lsize_pow) groups['cpu_kernel'][1].append(cpk.mean() / (10**6)) if not math.isnan(gpt.mean()): groups['gpu_transfer'][0].append(lsize_pow) groups['gpu_transfer'][1].append(gpt.mean() / (10**6)) if not math.isnan(gpk.mean()): groups['gpu_kernel'][0].append(lsize_pow) groups['gpu_kernel'][1].append(gpk.mean() / (10**6)) plt.GrouppedBars( groups=groups, plot_name="exec_times_per_lsize", path=pathlib.Path("./plots/exec_times_lsize/{}".format(n)), x_name="power of 2", y_name="ms", ) return
def ExecutionTimesGlobalSize() -> None: """ Iterate over multiple global sizes and collect the execution time for transferring to CPU and GPU and executing kernel on CPU and GPU and report groupped bar plot. """ global src1 global src2 MIN_GISZE_POW = 8 MAX_GSIZE_POW = 28 N_RUNS = { 'src1': { 2**8: 10**5, 2**9: 10**5, 2**10: 10**5, 2**11: 10**5, 2**12: 10**5, 2**13: 10**5, 2**14: 10**4, 2**15: 10**4, 2**16: 10**4, 2**17: 10**4, 2**18: 10**4, 2**19: 10**3, 2**20: 10**3, 2**21: 10**3, 2**22: 10**3, 2**23: 10**2, 2**24: 10**2, 2**25: 10**1, 2**26: 10**1, 2**27: 10**1, 2**28: 10**1, }, 'src2': { 2**8: 10**5, 2**9: 10**5, 2**10: 10**5, 2**11: 10**5, 2**12: 10**5, 2**13: 10**4, 2**14: 10**4, 2**15: 10**4, 2**16: 10**4, 2**17: 10**3, 2**18: 10**3, 2**19: 10**3, 2**20: 10**3, 2**21: 10**2, 2**22: 10**2, 2**23: 10**2, 2**24: 10**1, 2**25: 10**1, 2**26: 10**1, 2**27: 10**1, 2**28: 10**1, } } for n, src in [("src1", src1), ("src2", src2)]: labels = { 'CPU': { 'data': [], 'names': None }, 'GPU': { 'data': [], 'names': None }, } groups = { 'cpu_transfer': [[], []], 'cpu_kernel': [[], []], 'gpu_transfer': [[], []], 'gpu_kernel': [[], []], } for gsize_pow in range(MIN_GISZE_POW, MAX_GSIZE_POW + 1): print("##########", gsize_pow, 2**gsize_pow) cpt, cpk, gpt, gpk = opencl.CLDriveExecutionTimes( src, num_runs=N_RUNS[n][2**gsize_pow], gsize=2**gsize_pow, lsize=256) if cpt is None: while cpt is None: cpt, cpk, gpt, gpk = opencl.CLDriveExecutionTimes( src, num_runs=N_RUNS[n][2**gsize_pow], gsize=2**gsize_pow, lsize=256) print(cpt.mean(), cpk.mean(), gpt.mean(), gpk.mean()) if not math.isnan(cpt.mean()): groups['cpu_transfer'][0].append(lsize_pow) groups['cpu_transfer'][1].append(cpt.mean() / (10**6)) if not math.isnan(cpk.mean()): groups['cpu_kernel'][0].append(lsize_pow) groups['cpu_kernel'][1].append(cpk.mean() / (10**6)) if not math.isnan(gpt.mean()): groups['gpu_transfer'][0].append(lsize_pow) groups['gpu_transfer'][1].append(gpt.mean() / (10**6)) if not math.isnan(gpk.mean()): groups['gpu_kernel'][0].append(lsize_pow) groups['gpu_kernel'][1].append(gpk.mean() / (10**6)) plt.GrouppedBars( groups=groups, plot_name="exec_times_per_gsize", path=pathlib.Path("./plots/exec_times_gsize/{}".format(n)), x_name="power of 2", y_name="ms", ) return
def SRCIRORVsBenchPress(**kwargs) -> None: """ Compare mutec mutation tool on github's database against BenchPress. Comparison is similar to KAverageScore comparison. """ seed = kwargs.get('seed') benchpress = kwargs.get('benchpress') srciror_cache = kwargs.get('srciror_cache', '') mutation_level = kwargs.get('mutation_level') target = kwargs.get('targets') feature_space = kwargs.get('feature_space') top_k = kwargs.get('top_k') beam_width = kwargs.get('beam_width') unique_code = kwargs.get('unique_code', False) plot_config = kwargs.get('plot_config') workspace_path = kwargs.get('workspace_path') if mutation_level == 'src': if not pathlib.Path(SRCIROR_SRC).exists(): raise FileNotFoundError( "SRCIROR_src executable not found: {}".format(SRCIROR_SRC)) else: if not pathlib.Path(SRCIROR_IR).exists(): raise FileNotFoundError( "SRCIROR_IR executable not found: {}".format(SRCIROR_IR)) if seed.db_type != encoded.EncodedContentFiles and seed.db_type != clsmith.CLSmithDatabase: raise ValueError( "Scores require EncodedContentFiles or CLSmithDatabase but received", seed.db_type) if benchpress.db_type != samples_database.SamplesDatabase: raise ValueError( "BenchPress scores require SamplesDatabase but received", benchpress.db_type) if seed.db_type == clsmith.CLSmithDatabase: if not pathlib.Path(CLSMITH_INCLUDE).exists(): raise FileNotFoundError( "CLSMITH_INCLUDE folder does not exist: {}".format( CLSMITH_INCLUDE)) ## Load database and checkpoint of targets. mutec_db = samples_database.SamplesDatabase(url="sqlite:///{}".format( pathlib.Path(srciror_cache).resolve()), must_exist=False) done = set() with mutec_db.Session(commit=True) as s: res = s.query(samples_database.SampleResults).filter_by( key=feature_space).first() if res is not None: done.update([str(x) for x in res.results.split('\n')]) s.commit() ## Initialize dictionary. groups = {} groups["SRCIROR_{}".format(mutation_level)] = ([], []) groups[seed.group_name] = ([], []) groups[benchpress.group_name] = ([], []) ## Fix fetching data functions. if unique_code: git_get_data = lambda x: seed.get_unique_data_features(x) bp_get_data = lambda x: benchpress.get_unique_data_features(x) else: git_get_data = lambda x: seed.get_data_features(x) bp_get_data = lambda x: benchpress.get_data_features(x) ## Run engine on mutec. benchmarks = target.get_benchmarks(feature_space) for benchmark in tqdm.tqdm(benchmarks, total=len(benchmarks), desc="Benchmarks"): ## This has already been searched for. if benchmark.name in done: continue ## Tuple of closest src, distance from target benchmark.0 closest = workers.SortedSrcDistances(git_get_data(feature_space), benchmark.features, feature_space) ## IF CLsmith takes too long here, collect only features, then for the beam size go and fetch ## the code. # Split source and distances lists. git_dist = [x for _, _, x in closest] ## If distances are already minimized, nothing to do. if sum(git_dist[:top_k]) == 0: continue l.logger().info(benchmark.name) closest_mutec_src = beam_srciror( [(src, inc, dist) for src, inc, dist in closest[:beam_width] if dist > 0], benchmark.features, feature_space, beam_width, mutec_db)[:top_k] # tuple of (src, distance) closest_mutec_dist = [x for _, _, x in closest_mutec_src] assert len(closest_mutec_dist) == len(git_dist[:top_k]) ## If mutec has provided a better score if sum(closest_mutec_dist) < sum(git_dist[:top_k]): l.logger().info("Score reduced from {} to {}".format( sum(git_dist[:top_k]), sum(closest_mutec_dist))) l.logger().info("Best score from {} to {}".format( git_dist[0], closest_mutec_dist[0])) with mutec_db.Session(commit=True) as s: res = s.query(samples_database.SampleResults).filter_by( key=feature_space).first() if res is not None: res.results = res.results + "\n" + benchmark.name else: s.add( samples_database.SampleResults(key=feature_space, results=benchmark.name)) s.commit() # Compute target's distance from O(0,0) target_origin_dist = math.sqrt( sum([x**2 for x in benchmark.features.values()])) mutec_avg_dist = sum(closest_mutec_dist) / top_k groups["SRCIROR_{}".format(mutation_level)][0].append( benchmark.name) groups["SRCIROR_{}".format(mutation_level)][1].append( 100 * ((target_origin_dist - mutec_avg_dist) / target_origin_dist)) # Compute target's distance from O(0,0) git_avg_dist = sum(git_dist[:top_k]) / top_k groups[seed.group_name][0].append(benchmark.name) groups[seed.group_name][1].append( 100 * ((target_origin_dist - git_avg_dist) / target_origin_dist)) ## Run engine on benchpress. benchmarks = target.get_benchmarks(feature_space) for benchmark in tqdm.tqdm(benchmarks, total=len(benchmarks), desc="Benchpress"): ## Run only for benchmarks mutec has improved. if benchmark.name in groups["SRCIROR_{}".format(mutation_level)][0]: l.logger().info(benchmark.name) distances = workers.SortedDistances(bp_get_data(feature_space), benchmark.features, feature_space) # Compute target's distance from O(0,0) target_origin_dist = math.sqrt( sum([x**2 for x in benchmark.features.values()])) avg_dist = sum(distances[:top_k]) / len(distances[:top_k]) groups[benchpress.group_name][0].append(benchmark.name) groups[benchpress.group_name][1].append( 100 * ((target_origin_dist - avg_dist) / target_origin_dist)) plotter.GrouppedBars( groups=groups, plot_name="srciror_src_avg_{}_{}_{}".format( top_k, seed.group_name, feature_space.replace("Features", " Features")), path=workspace_path, **plot_config if plot_config else {}, ) return
def plot_speedups_with_clgen(benchmarks_data, clgen_data, suite="npb"): """ Plot speedups of predictive models trained with and without clgen. Returns speedups (without and with). """ # datasets: B - benchmarks, S - synthetics, BS - benchmarks + synthetics: B = pd.read_csv(benchmarks_data) B["group"] = ["B"] * len(B) S = pd.read_csv(clgen_data) S["group"] = ["S"] * len(S) BS = pd.concat((B, S)) # find the ZeroR. This is the device which is most frequently optimal Bmask = B[B["benchmark"].str.contains(suite)] zeror = Counter(Bmask["oracle"]).most_common(1)[0][0] zeror_runtime = "runtime_" + zeror.lower() # get the names of the benchmarks, in the form: $suite-$version-$benchmark benchmark_names = sorted( set([ re.match(r"^([^0-9]+-[0-9\.]+-[^-]+)-", b).group(1) for b in B["benchmark"] if b.startswith(suite) ])) B_out, S_out, BS_out = [], [], [] for benchmark in benchmark_names: clf = model.model() features = get_cgo13_features # cross validate on baseline B_out += model.leave_one_benchmark_out(clf, features, B, benchmark) # reset model clf = model.model() S_out += model.leave_one_benchmark_out(clf, features, BS, benchmark, synthetics=True, is_clgen=True) clf = model.model() # repeate cross-validation with synthetic kernels BS_out += model.leave_one_benchmark_out(clf, features, BS, benchmark, synthetics=False, is_clgen=True) # create results frame R_out = [] for b, s, bs in zip(B_out, S_out, BS_out): # get runtimes of device using predicted device b_p_runtime = b["runtime_" + b["p"].lower()] s_p_runtime = s["runtime_" + s["p"].lower()] bs_p_runtime = bs["runtime_" + bs["p"].lower()] # speedup is the ratio of runtime using the predicted device # over runtime using ZeroR device b["p_speedup"] = b_p_runtime / b[zeror_runtime] s["p_speedup"] = s_p_runtime / s[zeror_runtime] bs["p_speedup"] = bs_p_runtime / bs[zeror_runtime] print(b_p_runtime, s_p_runtime, bs_p_runtime, b[zeror_runtime], s[zeror_runtime], bs[zeror_runtime]) if "training" in benchmarks_data: # $benchmark group = escape_benchmark_name(b["benchmark"]) else: # $benchmark.$dataset group = re.sub(r"[^-]+-[0-9\.]+-([^-]+)-.+", r"\1", b["benchmark"]) + "." + b["dataset"] b["group"] = group s["group"] = group bs["group"] = group # set the training data type b["training"] = "Grewe et al." s["training"] = "Only CLgen" bs["training"] = "w. CLgen" R_out.append(b) R_out.append(s) R_out.append(bs) R = pd.DataFrame(R_out) b_mask = R["training"] == "Grewe et al." s_mask = R["training"] == "Only CLgen" bs_mask = R["training"] == "w. CLgen" B_speedup = mean(R[b_mask].groupby(["group"])["p_speedup"].mean()) S_speedup = mean(R[s_mask].groupby(["group"])["p_speedup"].mean()) BS_speedup = mean(R[bs_mask].groupby(["group"])["p_speedup"].mean()) groups = { "Benchmarks": {}, "Bench+Synth": {}, "Synthetics": {}, } bench_times = 0.0 benchsynth_times = 0.0 synth_times = 0.0 R.to_csv("./investigate.csv") # for x in R[b_mask]: # print(x) # bench_times += x["runtime_" + x["p"].lower()] # for x in R[bs_mask]: # benchsynth_times += x["runtime_" + x["p"].lower()] # for x in R[s_mask]: # synth_times += x["runtime_" + x["p"].lower()] # print(bench_times) # print(benchsynth_times) # print(synth_times) print(len(R[b_mask]["p_speedup"])) print(len(R[s_mask]["p_speedup"])) print(len(R[bs_mask]["p_speedup"])) for x in R[b_mask]["p_speedup"]: x = int(x) if x not in groups["Benchmarks"]: groups["Benchmarks"][x] = 1 else: groups["Benchmarks"][x] += 1 for x in R[bs_mask]["p_speedup"]: x = int(x) if x not in groups["Bench+Synth"]: groups["Bench+Synth"][x] = 1 else: groups["Bench+Synth"][x] += 1 for x in R[s_mask]["p_speedup"]: x = int(x) if x not in groups["Synthetics"]: groups["Synthetics"][x] = 1 else: groups["Synthetics"][x] += 1 for k, v in groups.items(): groups[k] = (list(v.keys()), list(v.values())) plotter.GrouppedBars( groups=groups, # Dict[Dict[int, int]] plot_name="speedup_distribution", path=pathlib.Path("."), title="Speedup distribution frequency", x_name="Speedup absolute value", ) b_distr = distributions.GenericDistribution( [int(x) for x in R[b_mask]["p_speedup"]], "plots", "benchmarks") s_distr = distributions.GenericDistribution( [int(x) for x in R[s_mask]["p_speedup"]], "plots", "synthetics") bs_distr = distributions.GenericDistribution( [int(x) for x in R[bs_mask]["p_speedup"]], "plots", "synthetics_benchmarks") b_distr.plot() s_distr.plot() bs_distr.plot() print(s_distr - b_distr > 0) print(bs_distr - b_distr > 0) (s_distr - b_distr).plot() (bs_distr - b_distr).plot() print(" #. benchmarks: ", len(set(B["benchmark"])), "kernels,", len(B), "observations") print(" #. synthetic: ", len(set(S["benchmark"])), "kernels,", len(S), "observations") print() print(" ZeroR device: {}".format(zeror)) print() print(" Speedup of Grewe et al.: {:.2f} x".format(B_speedup)) print(" Speedup w. CLgen: {:.2f} x".format(BS_speedup)) print(" Speedup Only CLgen: {:.2f} x".format(S_speedup)) # print(" Speedup of Grewe et al.: {:.2f} x".format(model.geomean([x for x in R[b_mask]["p_speedup"]]))) # print(" Speedup w. CLgen: {:.2f} x".format(model.geomean([x for x in R[bs_mask]["p_speedup"]]))) # print(" Speedup Only CLgen: {:.2f} x".format(model.geomean([x for x in R[s_mask]["p_speedup"]]))) bft = [x.p_speedup for idx, x in R[b_mask].iterrows() if x.group == "FT.B"] sft = [x.p_speedup for idx, x in R[s_mask].iterrows() if x.group == "FT.B"] bsft = [ x.p_speedup for idx, x in R[bs_mask].iterrows() if x.group == "FT.B" ] print() print() print() print("FT.B Grewe: {}".format(sum(bft) / len(bft))) print("FT.B w Clgen: {}".format(sum(bsft) / len(bsft))) print("FT.B Only Clgen: {}".format(sum(sft) / len(sft))) R = R.append( { # average bars "group": "Average", "p_speedup": B_speedup, "training": "Grewe et al." }, ignore_index=True) R = R.append( { "group": "Average", "p_speedup": BS_speedup, "training": "w. CLgen" }, ignore_index=True) R["p_speedup"] -= 1 # negative offset so that bars start at 1 # colors palette = sns.cubehelix_palette(len(set(R["training"])), rot=-.4, light=.85, dark=.35) ax = sns.barplot(x="group", y="p_speedup", data=R, ci=None, hue="training", palette=palette) plt.ylabel("Speedup") plt.xlabel("") plt.axhline(y=0, color="k", lw=1) # speedup line plt.axvline(x=plt.xlim()[1] - 1, color="k", lw=1, linestyle="--") # average line ax.get_legend().set_title("") # no legend title plt.legend(loc='upper right') ax.get_legend().draw_frame(True) # plot shape and size figsize = (3 * 9, 3 * 2.2) if "nvidia" in benchmarks_data: typecast = int plt.ylim(-1, 16) elif "training" in benchmarks_data: typecast = float figsize = (3 * 7, 3 * 3.2) else: typecast = float # counter negative offset: ax.set_yticklabels([typecast(i) + 1 for i in ax.get_yticks()]) plt.setp(ax.get_xticklabels(), rotation=90) Finalize(output="plot.png", figsize=figsize, tight=True) return B_speedup, BS_speedup