def test_custom_models(): # define the search model in a decoupled way # the model can query the history and returns a proposal # use weight factor to balance from multiple search engines @ut.model(name="random", weight=0.5) def random_prop(vars, history): prop = dict() while True: for key, scope in vars.iterate(): if key == "v1": prop[key] = 1 else: prop[key] = scope.random() if not history.duplicate(prop): break return prop v1 = ut.tune(3, (1, 9), name="v1") v2 = ut.tune(5, (2, ut.vars.v1), name="v2") v3 = ut.tune(5, (2, 7), name="v3") # register co-variates ut.register(random.randint(1, 10), name="temp") ut.target(v1 + v2 + v3 * 2, "max")
def test_constraints(): # limit the search space @ut.rule(name="remove_outlier") def rule(vars): if (vars.v1 < 5) and (vars.v2 > 5): vars.v3 = 10 v1 = ut.tune(3, (1, 9), name="v1") v2 = ut.tune(5, (2, ut.vars.v1), name="v2") v3 = ut.tune(5, (2, 7), name="v3") # register co-variates ut.register(random.randint(1, 10), name="temp") ut.target(v1 + v2 + v3 *2, "max")
def main(parse_only=False): # Create an EDA option pool if not parse_only: cleanup() option = OrderedDict() for key, values in options.items(): option[key] = ut.tune(values[0], values, name=key) # If the design folder is symbolic if os.path.islink(design): os.unlink(design) t1 = Process(target=config, args=(option, )) t2 = Process(target=execute, args=(design, )) t1.daemon = True t2.daemon = True t1.start() t2.start() t1.join() t2.join() qor = 0 index = "default" work_path = os.path.abspath("./") # TODO: Parse the data # We just copy the rpt to separate folder if os.getenv("UT_TUNE_START"): index = ut.get_global_id() work_path = os.path.join(ut.get_meta_data("UT_WORK_DIR"), "ut-work-dir") index = "ut-rpt-{}".format(index) rpt_folder = os.path.join(work_path, str(index)) cmd = "mkdir -p {}; ".format(rpt_folder) cmd += "cd {}; ".format(design) cmd += "cp acl_quartus_report.txt quartus_sh_compile.log *json *rpt *qsf {}; cp ../*log {}".format( rpt_folder, rpt_folder) run_process(cmd) # Read frequency rpt = "{}/acl_quartus_report.txt".format(rpt_folder) if os.path.isfile(rpt): with open(rpt, "r") as fp: content = str(fp.readlines()) match = re.search(r"Kernel fmax: (\d+\.\d+)", content) qor = float(match[1]) else: print("Cannot find acl quartus report...") qor = -1 * float("inf") # Remove temp in profiling phase if os.getenv("UT_BEFORE_RUN_PROFILE"): cleanup() # Set the target ut.target(qor, "max")
def test_query(): # limit search result to satisfy certain conditions @ut.constraint(name="limit") def requirements(vars): conditions = [ vars.temp > 2, var.v1 + vars.v3 < 5, ] return conditions v1 = ut.tune(3, (1, 9), name="v1") v2 = ut.tune(5, (2, ut.vars.v1), name="v2") v3 = ut.tune(5, (2, 7), name="v3") a = random.randint(0, 10) ut.register(a * 2, name="temp") ut.target(v1 + v2 - v3, "max")
import uptune as ut import random import subprocess import re #resub -K 6 -l 2 aig_pth = "i10.aig" abc_opt_pool = [ 'balance', 'rewrite', 'resub', 'refactor', 'rewrite -z', 'refactor -z' ] syn_all = [] for i in range(0, 24): ut_idx0 = ut.tune(0, (0, 5), name='int_range') at = abc_opt_pool[ut_idx0] at2 = ut.tune(6, [6, 8, 10, 12]) if at == 'resub': at += ' -K ' + str(at2) syn_all.append(at) def abc_synthesis_flow(aig_pth="i10.aig"): command = "abc -c \"read %s;" % aig_pth for at in syn_all: command += at + ";" command += "if -K 6;print_stats\" > abc_if_res.log" return command print(abc_synthesis_flow()[:-16]) process = subprocess.Popen(abc_synthesis_flow(), shell=True) process.wait()
import uptune as ut x = ut.tune(2, (2, 15), name="x") y = ut.tune(5, (2, 12), name="y") a = ut.tune(2, (2, 15), name="a") b = ut.tune(5, (2, 12), name="b") # Expected causal graph xy = x * y + x * x ab = a * a + b * b + a * b res = ab - xy ut.feature(ab, "ab") ut.feature(xy, "xy") ut.target(res, "max")
def main(parse_only=False): # Create an EDA option pool option = OrderedDict() if not parse_only: cleanup() for key, values in options.items(): option[key] = ut.tune(values[0], values, name=key) config(option) pwd = os.getcwd() t1 = Process(target=execute, args=(pwd,)) t1.daemon = True t1.start() t1.join() # Extract QoR result qor = 0; index = "default" work_path = os.path.abspath("./") # TODO: Parse the data # We just copy the rpt to separate folder if os.getenv("UT_TUNE_START"): index = ut.get_global_id() work_path = os.path.join(ut.get_meta_data("UT_WORK_DIR"), "ut.temp") index = "ut.rpt.{}".format(index) rpt_folder = os.path.join(work_path, str(index)) cmd = "mkdir -p {}; cp build_dir.hw.xilinx_u280_xdma_201920_1/reports/link/imp/* {}"\ .format(rpt_folder, rpt_folder) run_process(cmd) # cp the vivado log and config files cmd = "cp build_dir.hw.xilinx_u280_xdma_201920_1/link/vivado/vpl/vivado.log {}; cp *config.ini {}"\ .format(rpt_folder, rpt_folder) run_process(cmd) # Read frequency rpt = "{}/xilinx_u280_xdma_201920_1_bb_locked_timing_summary_postroute_physopted.rpt".format(rpt_folder) if os.path.isfile(rpt): with open(rpt, "r") as fp: content = fp.readlines() index = 0 for line in content: if "Design Timing Summary" in line: numbers = content[index+6].strip().split() wns = float(numbers[0]) tns = float(numbers[1]) qor = (1000 / float(option["Frequency"])) - wns break index += 1 else: print("Cannot find vivado timing report...") qor = float("inf") # Remove temp in profiling phase if os.getenv("UT_BEFORE_RUN_PROFILE"): cleanup() # Set the target ut.target(qor, "min")
def test_async_execution(): stall = ut.tune(10, (0, 20), name="stall") time.sleep(stall) ut.target(stall, "max")
end = time.time() total += float(end - start) * 1000 return old_div(total, trials) def run_baselines(): print("baseline perfs -O0={}ms -O1={}ms -O2={}ms -O3={}ms".format( *[run_with_flags(['-O%d' % i]) for i in range(4)])) # run_baselines() # ----------------- # Run the autotuning # ----------------- options = dict() options['-O'] = ut.tune(3, (0, 3), name="-O") for flag in found_cc_flags: options[flag] = ut.tune('default', ['on', 'off', 'default'], name=flag) for param in working_params: defaults = param_defaults[param] if defaults['max'] <= defaults['min']: defaults['max'] = float('inf') defaults['max'] = min(defaults['max'], max(1, defaults['default']) * args.scaler) defaults['min'] = max(defaults['min'], old_div(max(1, defaults['default']), args.scaler)) if param == 'l1-cache-line-size': # gcc requires this to be a power of two or it internal errors options[param] = 2**ut.tune(defaults['default'], (2, 8), name=param)
#!/usr/bin/env python # # test case - permutation parameter # import uptune as ut dataset = ["p01_d.txt", "att48_d.txt", "p01_s.txt"] data = "data/" + dataset[0] m = open(data).readlines() distance = [[int(i) for i in l.split()] for l in m] def eval(p): return sum(distance[p[i]][p[i + 1]] for i in range(len(p) - 1)) default = list(range(len(distance))) p = ut.tune(default, name="perm") # return the distance ret = ut.target(eval(p))
#!/usr/bin/env python import uptune as ut import subprocess option_dict = dict() option_dict["auto_dsp_recognition"] = ut.tune('On', ['On', 'Off']) option_dict["disable_register_merging_across_hierarchies"] = ut.tune('Auto', ['On', 'Off', 'Auto']) option_dict["mux_restructure"] = ut.tune('Auto', ['On', 'Off', 'Auto']) option_dict["optimization_technique"] = ut.tune('Balanced', ['Area', 'Speed', 'Balanced']) option_dict["synthesis_effort"] = ut.tune('Auto', ['Auto', 'Fast']) option_dict["synth_timing_driven_synthesis"] = ut.tune('On', ['On', 'Off']) option_dict["fitter_aggressive_routability_optimization"] = ut.tune('Automatically', ['Always', 'Automatically', 'Never']) option_dict["fitter_effort"] = ut.tune('Auto Fit', ['Standard Fit', 'Auto Fit']) option_dict["remove_duplicate_registers"] = ut.tune('On', ['On', 'Off']) option_dict["physical_synthesis"] = ut.tune('Off', ['On', 'Off']) # Generate options.tcl that has the flag assignments cfg = "" for k, v in option_dict.items() : cfg += 'set_global_assignment -name \"' + k + '\" \"' + v + '\"\n' f = open('options.tcl', 'w') f.write(cfg) f.close() # Invoke Quartus Pro subprocess.Popen('quartus_sh -t ./run.tcl', shell=True).wait() # Parse slack def get_timing(workdir, stage): slack , tns = 'None', 'None' f = open(workdir + '/Systolic_Array_8x8.sta.' + stage + '.summary', 'r')
start = time.time() code = execute(output_dir) end = time.time() total += float(end - start) * 1000 return old_div(total, trials) def run_baselines(run_args): print("baseline perfs -O0={}ms -O1={}ms -O2={}ms -O3={}ms".format( *[run_with_flags(['-Xptxas -O%d,-v' % i], run_args=run_args) for i in range(4)])) # GEMM Benckmark (Dimension + Blocksize + Cache) run_args = " 1024 32 2" # run_baselines(run_args) # ----------------- # Run the autotuning # ----------------- options = dict() for flag in NVCC_FLAGS + PTXAS_FLAGS + NVLINK_FLAGS: options[flag] = ut.tune('on', ['on', 'off'], name=flag) params = {**PTXAS_PARAMS, **NVCC_PARAMS} for param, space in params.items(): options[param] = ut.tune(space[0], space, name=param) cmd = make_command(options) runtime = run_with_flags([], cmd, run_args) print("Runtime {}".format(runtime)) ut.target(runtime, "min")
import time import uptune as ut a = ut.tune(1, (2, 109)) b = ut.tune(1, (3, 999)) c = ut.tune(1, (4, 239)) res = ut.target(2 * a + c) time.sleep(10) d = ut.tune(1, (5, 89)) e = ut.tune(1, (6, 909)) f = ut.tune(1, (2, 1299)) val = ut.target(2 * f + a)
import uptune as ut # define the design search space chars = [chr(i) for i in range(97, 97 + 26)] a = ut.tune('a', chars) b = ut.tune('c', chars) c = ut.tune(0.2, (1.0, 9.0), name="c") d = ut.tune(2, (1, 9), name="d") print(ut.c, type(ut.d)) ut.constraint(ut.c * ut.d < 9) res = ut.target(c * hash(a) - d * hash(b))