def run(self, desired_result, input, limit): cfg = desired_result.configuration.data if (self._manipulator.validate(cfg)): cmd = self._cmd for i in range(self._solvers): cmd += ' ' + str(cfg[self._names + str(i)]) result = self.call_program(cmd, limit = self._timeout)['time'] else: result = self._timeout return Result(time = result)
def run(self, desired_result, input, limit): """ Mess around with stuff, try to figure it out """ cfg = desired_result.configuration.data level = self.args.level # Should return 0 if OK. Returns a higher number the more invalid the setup is. # For example, 4 if it has 4 unsatisfied requirements, and 7 if it has 7 unsatisfied # requirements. valid_status = self.tree.evalRequirements(cfg, level) if valid_status != 0: return Result(time=valid_status, state='ERROR') res = self.tree.evalTree(cfg) return Result(time=-res)
def run(self, desired_result, input, limit): cfg = desired_result.configuration.data nsupval='{0}'.format(cfg['nsup']) os.environ['NSUP']=nsupval try: print "NSUP = " + os.environ['NSUP'] except KeyError: print "No environmental variable set for NSUP" sys.exit(1) nrelval='{0}'.format(cfg['nrel']) os.environ['NREL']=nrelval try: print "NREL = " + os.environ['NREL'] except KeyError: print "No environmental variable set for NREL" sys.exit(1) colpermvalue='{0}'.format(cfg['colperm']) os.environ['COLPERM']=colpermvalue try: print "COLPERM = " + os.environ['COLPERM'] except KeyError: print "No environmental variable set for COLPERM" sys.exit(1) lookaheadvalue='{0}'.format(cfg['lookahead']) os.environ['NUM_LOOKAHEADS']=lookaheadvalue try: print "NUM_LOOKAHEADS = " + os.environ['NUM_LOOKAHEADS'] except KeyError: print "No environmental variable set for NUM_LOOKAHEADS" sys.exit(1) numthreads='{0}'.format(cfg['numthreads']) os.environ['OMP_NUM_THREADS']=numthreads try: print "OMP_NUM_THREADS = " + os.environ['OMP_NUM_THREADS'] except KeyError: print "No environmental variable set for OMP_NUM_THREADS" sys.exit(1) #time is the performance measure currently used superlu_run = 'mpirun -n 16 pddrive -r 4 -c 4 ' datafile = os.getenv("HOME") datafile += '/Research/DataFiles/Ga19As19H42/Ga19As19H42.rb' superlu_run += datafile print superlu_run run_res = self.call_program(superlu_run) assert run_res['returncode'] == 0 print run_res['stdout'] return Result(time=run_res['time']) # 'time' is a built-in metric in OpenTuner, and it's a minimizer
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ cfg = desired_result.configuration.data f = open(self.filename, mode="w") for i, param in enumerate(self.params): if param[0] == "None": f.write(param[1] + "\n") continue f.write(str(cfg[i]) + "\n") f.close() print(cfg) run_result = self.call_program(self.run_cmd, limit=self.args.test_timeout) print(run_result) if run_result['returncode'] != 0: return Result(time=float('inf')) return Result(time=run_result['time'])
def run(self, desired_result, input, limit): """ Run training with particular hyperparameters and see how goo the performance is """ cfg = desired_result.configuration.data print("Running with config: ", cfg) result = run_tuning(cfg, self.args.config) print("Config: ", cfg, "\nResult: ", result) return Result(time=-result)
def test_result_relative(self): obj = MinimizeTimeAndResource() # ((actual.time, actual.size), (expected.time, expected.size): # result test_cases = { ((2, 3), (2, 3)): 1, ((2.0, 3.0), (2.0, 3.0)): 1, ((2.0, 3.0), (1.0, 3.0)): 2, ((2.0, 3.0), (2.0, 2.0)): 1.5, ((2.0, 3.0), (10.0, 3.0)): 0.2, ((2.0, 3.0), (2.0, 30)): 0.1, ((0, 0), (0, 0)): float('nan'), ((0, 1), (0, 0)): float('inf'), } for k, v in test_cases.items(): a = Result(time=k[0][0], size=k[0][1]) b = Result(time=k[1][0], size=k[1][1]) if isnan(float(v)): self.assertTrue(isnan(float(obj.result_relative(a, b)))) else: self.assertEquals(obj.result_relative(a, b), v)
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ param_list = [ 'INT_WIDTH_FEATURE', 'FRA_WIDTH_FEATURE', # 'INT_WIDTH_DATA', # 'FRA_WIDTH_DATA', 'INT_WIDTH_TMP', 'FRA_WIDTH_TMP', ] int_list = [ # 'INT_WIDTH_DATA2', 'INT_WIDTH_LABEL' # 'INT_WIDTH_LABEL2' ] tot_list = [ 'TOT_WIDTH_FEATURE', # 'TOT_WIDTH_DATA', 'TOT_WIDTH_TMP' ] cfg = desired_result.configuration.data result_id = desired_result.id # dump cfg in tcl f = open('./options.tcl', 'w') f.write('set CFLAGS "') for index in range(2): param_int = param_list[2*index] param_fra = param_list[2*index+1] value_tot = int(cfg[param_int]) + int(cfg[param_fra]) f.write(' -D' + param_int + '=' + str(cfg[param_int])) f.write(' -D' + tot_list[index] + '=' + str(value_tot)) f.write(' -D' + int_list[0] + '=' + str(cfg[int_list[0]])) f.write('"\n') f.close() cmd = 'vivado_hls ./csim.tcl' #try: run_result = self.call_program(cmd) assert run_result['returncode'] == 0 result, metadata = self.get_qor() #except: # result = 100000 # metadata = [0,0,0,0,0] self.dumpresult(cfg, result, metadata) return Result(time = result)
def compile(self, cfg, id): """ Compile and run a given configuration then return performance """ run_dir = os.path.join(os.getcwd(), 'job{0}'.format(str(id))) nodes = 1 # cluster nodes ppn = 1 # processor cores wtime_limit = limit # call compile command on cluster if self.gcc_cmd != '': newcmd = self.gcc_cmd.format(**cfg) job_id = self.cluster.submit(newcmd, run_dir, nodes, ppn, wtime_limit) logging.info('tunerID={0} jobID={1} CompileCommand={2}'.format(id, job_id, newcmd)) job_state = None while job_state == None or job_state == "QUEUED" or job_state == "RUNNING": time.sleep(1) job_state = self.cluster.get_state(job_id)[0] assert job_state == "DONE" # call run command on cluster newrun = self.run_cmd.format(**cfg) newrun = "/usr/bin/time -o clocks -f %e "+newrun job_id = self.cluster.submit(newrun, run_dir, nodes, ppn, wtime_limit) logging.info('tunerID={0} jobID={1} RunCommand={2}'.format(id, job_id, newrun)) job_state = None while job_state == None or job_state == "QUEUED" or job_state == "RUNNING": # waiting for cluster time.sleep(1) job_state = self.cluster.get_state(job_id)[0] if job_state.find('-10') == -1: assert job_state == "DONE" #t = self.cluster.get_state(job_id)[1] # get time of run time_file = open("job{0}/clocks".format(str(id)), "r") t = float(time_file.read()) logging.info('tunerID={0} Runtime={1}'.format(id, t)) shutil.rmtree('job{0}'.format(str(id))) return Result(time=t) else: return Result(time=wtime_limit + 1)
def run_precompiled(self, desired_result, input, limit, compile_result, result_id): """ Run the given desired_result SEQUENTIALLY on input and produce a Result() Abort early if limit (in seconds) is reached Assume that the executable to be measured has already been compiled to an executable corresponding to identifier id by compile() The compile_result is the return result of compile(), and it will be None if compile() was not called """ opt_seq = '' cfg = desired_result.configuration.data for flag in OPT_FLAGS: if cfg[flag] == 'on': opt_seq += ' {0}'.format(flag) tmp_dir = self.get_tmpdir(result_id) matcher = args.matcher if (matcher == ''): matcher = home + '/Github//validating-binary-decompilation/source/build/bin//matcher' matcher_run_cmd = '{0} --file1 {1}/test.mcsema.opt.ll:{2} --file2 {1}/test.proposed.opt.ll:{2} --potential-match-accuracy'.format( matcher, tmp_dir, args.func) matcher_run_result = self.call_program(matcher_run_cmd) if matcher_run_result['returncode'] != 0: print(matcher_run_result['stderr']) assert 0 matcher_stderr = matcher_run_result['stderr'] z = re.findall(r"^Accuracy:(\d+\.[\deE+-]+)", matcher_stderr, re.MULTILINE) cost = 1 - float(z[0]) log.debug('[RunPreC] Cost:{0} [{1}]'.format(cost, opt_seq)) # Early exit outfile = args.outdir + '/' + 'normalizer_final_config.json' if cost == 0: log.info( "run_precompiled: Early Exit: Optimal pass sequence written to {0}: [{1}]" .format(outfile, opt_seq)) #shutil.rmtree("./tmp") # os.remove("opentuner.log") # with open(outfile, 'a') as fd: fd.write('{0}\n'.format(opt_seq)) return Result(time=cost)
def lud_command(self, desired_result, cfg, input, limit): cpu_cmd = './lud -p 0 -d 0 --type 0 -x {0} -- -s 4096'.format( cfg['local1D']) gpu_cmd = './lud -p 1 -d 0 --type 1 -x {0} -- -s 4096'.format( cfg['local1D']) try: run_result = self.call_program(cpu_cmd) finally: assert run_result['returncode'] == 0 time = self.get_kernel_time('lsb.lud.r0', 'diagonal_kernel') time += self.get_kernel_time('lsb.lud.r0', 'perimeter_kernel') time += self.get_kernel_time('lsb.lud.r0', 'internal_kernel') remove('lsb.lud.r0') return Result(time=time)
def run_precompiled(self, desired_result, input, limit, compile_result, id): """ Run a compile_result from compile() sequentially and return performance """ assert compile_result['returncode'] == 0 try: run_result = self.call_program('./tmp{0}.bin'.format(id)) assert run_result['returncode'] == 0 finally: self.call_program('rm ./tmp{0}.bin'.format(id)) return Result(time=run_result['time'])
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ cfg = desired_result.configuration.data CFLAGS = '-fopenmp -fPIC -O3 -std=c99 -Wall -pedantic -Wshadow -Wno-unused ' CFLAGS += '-DPLASMA_WITH_MKL -DMKL_Complex16="double _Complex" -DMKL_Complex8="float _Complex" ' INC = '-I/home/pedro/plasma_autotuner/include ' INC += '-I/home/pedro/plasma_autotuner/test ' INC += '-I/opt/intel/compilers_and_libraries_2016.3.210/linux/mkl/include ' LIBS = '-L/opt/intel/compilers_and_libraries_2016.3.210/linux/mkl/lib -lmkl_intel_lp64 -lmkl_core -lmkl_sequential -lm ' LIBS += '-L/home/pedro/plasma_autotuner/lib -lplasma -lcoreblas ' gcc_cmd = 'gcc -c {0} {1} {2} -D{3}={4} dccrb2cm.c -o dccrb2cm.o && '.format( INC, CFLAGS, LIBS, 'TILE_SIZE', cfg['TILE_SIZE']) gcc_cmd += 'gcc -c {0} {1} {2} -D{3}={4} dcm2ccrb.c -o dcm2ccrb.o && '.format( INC, CFLAGS, LIBS, 'TILE_SIZE', cfg['TILE_SIZE']) gcc_cmd += 'gcc -c {0} {1} {2} -D{3}={4} core_dgemm.c -o core_dgemm.o && '.format( INC, CFLAGS, LIBS, 'TILE_SIZE', cfg['TILE_SIZE']) gcc_cmd += 'gcc -c {0} {1} {2} -D{3}={4} pdgemm.c -o pdgemm.o && '.format( INC, CFLAGS, LIBS, 'TILE_SIZE', cfg['TILE_SIZE']) gcc_cmd += 'gcc -c {0} {1} {2} -D{3}={4} dgemm.c -o dgemm.o && '.format( INC, CFLAGS, LIBS, 'TILE_SIZE', cfg['TILE_SIZE']) gcc_cmd += 'gcc -c {0} {1} {2} -D{3}={4} core_dsyrk.c -o core_dsyrk.o && '.format( INC, CFLAGS, LIBS, 'TILE_SIZE', cfg['TILE_SIZE']) gcc_cmd += 'gcc -c {0} {1} {2} -D{3}={4} pdsyrk.c -o pdsyrk.o && '.format( INC, CFLAGS, LIBS, 'TILE_SIZE', cfg['TILE_SIZE']) gcc_cmd += 'gcc -c {0} {1} {2} -D{3}={4} dsyrk.c -o dsyrk.o && '.format( INC, CFLAGS, LIBS, 'TILE_SIZE', cfg['TILE_SIZE']) gcc_cmd += 'gcc dcm2ccrb.o dccrb2cm.o core_dgemm.o pdgemm.o dgemm.o core_dsyrk.o pdsyrk.o dsyrk.o {0} {1} {2} -D{3}={4} test_opentuner.c -o ./tmp.bin'.format( INC, CFLAGS, LIBS, 'TILE_SIZE', cfg['TILE_SIZE']) #gcc_cmd = 'gcc -c -fopenmp mmm_block.cpp' #gcc_cmd += ' -D{0}={1}'.format('TILE_SIZE',cfg['TILE_SIZE']) #gcc_cmd += ' -o ./tmp.bin' #print(gcc_cmd) compile_result = self.call_program(gcc_cmd) assert compile_result['returncode'] == 0 run_cmd = './tmp.bin' run_result = self.call_program(run_cmd) assert run_result['returncode'] == 0 return Result(time=run_result['time'])
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ cfg = desired_result.configuration.data jvm_cmd = self.cfg_to_cmd(cfg, args.source) run_result = self.call_program(jvm_cmd) assert run_result['returncode'] == 0 if self.min_time < 0 or run_result['time'] < self.min_time: self.min_time = run_result['time'] return Result(time=run_result['time'])
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ cfg = desired_result.configuration.data result_id = desired_result.id # acquire configuration index = cfg['index'] sample_run = "python ./sample.py " + str(index) run_result = self.call_program(sample_run) assert run_result['returncode'] == 0 result = self.get_qor() self.dumpresult(cfg, result) return Result(time=result)
def run(self, desired_result, input, limit): allParamVals = copy.deepcopy(desired_result.configuration.data) allParamVals.update(self.stableParams) print(allParamVals) minAcc = math.inf maxTime = 0 for run in range(self.tuneRuns): sys.stdout.write('.') sys.stdout.flush() output = self.runner(AxProf.defaultInputFileName, allParamVals) acc = self.accMetric(output['input'], output['acc'], allParamVals) time = output['time'] if acc < minAcc: minAcc = acc if time > maxTime: maxTime = time print(maxTime, minAcc) return Result(time=maxTime, size=1, accuracy=minAcc)
def run(self, desired_result, input, limit): """ Runs a program for given configuration and returns the result """ jar_path = self.arg_dict.get( ArgumentParser.JAR_PATH_ARG_NAME) program_conf = self.arg_dict.get( ArgumentParser.PROGRAM_CONF_ARG_NAME, "") fixed_args = self.arg_dict.get( ArgumentParser.FIXED_SPARK_PARAM, "") # This config dict is keyed by the program flag. See # manipulator(). cfg_data = desired_result.configuration.data log.info("Config dict: " + str(cfg_data)) # Extract all SparkParamType objects from map # Seems strange making a SparkParamType out of a value but it helps # maintain a consistent interface to SparkSubmitCmd tuner_cfg = {flag: self.param_dict[flag].make_param( cfg_data[flag]) for flag in cfg_data} # TODO figure out appropriate defaults spark_submit = SparkSubmitCmd({}, {}, fixed_args) # make_cmd() expects only dicts of flags to SparkParamType as input run_cmd = spark_submit.make_cmd( jar_path, program_conf, self.param_dict, tuner_cfg) log.info(run_cmd) run_result = self.call_program(run_cmd) # TODO differentiate between config errors and errors due to # insufficient resources log.debug(str(run_result)) assert run_result[MeasurementInterfaceExt.RETURN_CODE] == 0, \ run_result[MeasurementInterfaceExt.STDOUT] # Log process performance metrics. metric_time = run_result[SparkMetrics.SECS] metric_mem_mb = Util.ratio(run_result[SparkMetrics.MEM_SECS], float(metric_time)) metric_vcores = Util.ratio(run_result[SparkMetrics.VCORE_SECS], float(metric_time)) log.info("Application metrics: time=%0.3fs, mem=%0.3fmb, vcores=%0.3f" % (metric_time, metric_mem_mb, metric_vcores)) return Result(time=metric_time, size=metric_mem_mb)
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ cfg = desired_result.configuration.data result_id = desired_result.id # acquire configuration cmd = 'abc -c \"read dut.blif;strash;' for param_id in range(5): cmd += cfg['param_' + str(param_id)] + ';' cmd += 'if -a -K 6;print_stats \" > qor.txt' run_result = self.call_program(cmd) assert run_result['returncode'] == 0 result = self.get_qor() self.dumpresult(cfg, result) return Result(time=result)
def run(self, desired_result, input, limit): self.testnum += 1 cfg = desired_result.configuration.data # self.call_program('true') score = self.maxscore nonMatches = 0 lastNonMatch = None for p in sorted(cfg.keys()): # mark in trace self.trace[p][cfg[p]] = 1 if cfg[p] == self.goals[p]: score -= self.weight[p] else: nonMatches += 1 lastNonMatch = p if self.args.show_diffs and self.prevcfg is not None: print '%d:' % (self.testnum), for p in sorted(cfg.keys()): if cfg[p] != self.prevcfg[p]: print p, print if score < self.best: self.best = score print 'score=%d, (%d wrong) after %d tests' % (score, nonMatches, self.testnum), if lastNonMatch is not None: print ', highest=%s, weight %d' % (lastNonMatch, self.weight[lastNonMatch]), if False and self.bestcfg is not None: print ', changes:', for p in sorted(cfg.keys()): if cfg[p] != self.bestcfg[p]: print '%s,' % p, print self.bestcfg = cfg if lastNonMatch is None: print 'reached perfect score, exiting' sys.exit(0) self.prevcfg = cfg return Result(time=score)
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ cfg = desired_result.configuration.data gcc_cmd = 'g++ mmm_block.cpp ' gcc_cmd += ' -D{0}={1}'.format('BLOCK_SIZE',cfg['BLOCK_SIZE']) gcc_cmd += ' -o ./tmp.bin' compile_result = self.call_program(gcc_cmd) assert compile_result['returncode'] == 0 run_cmd = './tmp.bin' run_result = self.call_program(run_cmd) assert run_result['returncode'] == 0 return Result(time=run_result['time'])
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ cfg = desired_result.configuration.data gcc_cmd = 'g++ Matrix_Multiplication.cpp ' gcc_cmd += '-DBLOCK_SIZE=' + str(cfg['blockSize']) gcc_cmd += ' -O{0}'.format(cfg['opt_level']) gcc_cmd += ' -o ./tmp.bin' compile_result = self.call_program(gcc_cmd) # assert compile_result['returncode'] == 0 run_cmd = './tmp.bin' run_result = self.call_program(run_cmd) # assert run_result['returncode'] == 0 return Result(time=run_result['time'])
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ cfg = desired_result.configuration.data nruns = max(self.args.nruns, 1) begin = max(self.args.begin, self.mintilesize) end = max(self.args.end, self.mintilesize) m = random.randint(begin, end) n = random.randint(begin, end) if (self.args.tight): ldi = ldo = m else: ldi = max(random.randint(begin, end), m) ldo = max(random.randint(begin, end), m) kind = ["COPY", "ZERO"][self.args.zero] run_cmd = ( "CHECK=0 " + # no checks and only LIBXSMM measurement ["ZERO=0", "ZERO=1"][self.args.zero] + " LIBXSMM_M" + kind + "_M=" + str(self.granularity * cfg["M"]) + " LIBXSMM_M" + kind + "_N=" + str(self.granularity * cfg["N"]) + " ./matcopyf " + str(m) + " " + str(n) + " " + str(ldi) + " " + str(ldo) + " " + str(nruns)) + " " + str(self.args.nmb) run_result = self.call_program(run_cmd) if (0 == run_result["returncode"]): match = re.search( "LIBXSMM \\(" + kind.lower() + "\\):\\s+([0-9]+(\\.[0-9]*)*)", str(run_result["stdout"])) assert (match is not None) bandwidth = float(match.group(1)) assert (0 < bandwidth) kernelsize = (self.granularity**2) * cfg["M"] * cfg["N"] return Result(time=1 / bandwidth, accuracy=bandwidth, size=kernelsize) else: sys.tracebacklimit = 0 raise RuntimeError("Execution failed for \"" + run_cmd + "\"!")
def run(self, desired_result, input, limit): """Run a single config.""" del input # Unused del limit # Unused self.run_count += 1 try: # Run opt to produce an optimized bitcode file. cmd = [ self.opt, self.unoptimized_path, "-o", self.tmp_optimized_path, ] cmd += self.serialize_flags(desired_result.configuration.data) subprocess.check_call(cmd, timeout=300, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) if not Path(self.tmp_optimized_path).is_file(): return Result(time=float("inf")) except (subprocess.CalledProcessError, subprocess.TimeoutExpired): return Result(time=float("inf")) # We need to jump through a couple of hoops to optimize for runtime # using OpenTuner. Replace the environment benchmark with the current # optimized file. Use the same benchmark protocol buffer so that any # dynamic configuration is preserved. if self.target == OptimizationTarget.RUNTIME: try: new_benchmark = self.env.benchmark new_benchmark.proto.program.uri = f"file:///{self.tmp_optimized_path}" self.env.reset(benchmark=new_benchmark) return Result( time=float(np.median(self.env.observation.Runtime()))) except (ServiceError, TimeoutError): return Result(time=float("inf")) try: return Result(time=float( compute_observation(self.observation_space, self.tmp_optimized_path))) except (ValueError, TimeoutError): return Result(time=float("inf"))
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ cfg = desired_result.configuration.data gcc_cmd = 'g++ apps/raytracer.cpp -o ./tmp.bin' gcc_cmd += ' -O{0}'.format(cfg['opt_level']) for flag in GCC_FLAGS: if cfg[flag] == 'on': gcc_cmd += ' -f{0}'.format(flag) elif cfg[flag] == 'off': gcc_cmd += ' -fno-{0}'.format(flag) for param, min, max in GCC_PARAMS: gcc_cmd += ' --param {0}={1}'.format(param, cfg[param]) compile_result = self.call_program(gcc_cmd) assert compile_result['returncode'] == 0 run_result = self.call_program('./tmp.bin') assert run_result['returncode'] == 0 return Result(time=run_result['time'])
def run(self, desired_result, input, limit): log = logging.getLogger(__name__) """ Compile and run a given configuration then return performance """ cfg = desired_result.configuration.data # print "cfg: " + cfg gcc_cmd = 'g++ mmm_block.cpp ' gcc_cmd += ' -D{0}={1}'.format('BLOCK_SIZE', cfg['BLOCK_SIZE']) for flag in self.enum_param: # print 'flag ' + flag[0] if cfg[flag[0]] == 'on': gcc_cmd += ' -f{0}'.format(flag[0]) elif cfg[flag[0]] == 'off': gcc_cmd += ' -fno-{0}'.format(flag[0]) for i in self.int_param: # print i[0], cfg[i[0]] gcc_cmd += ' --param {0}={1}'.format(i[0], cfg[i[0]]) # logging.debug(gcc_cmd) gcc_cmd += ' -o ./tmp.bin' compile_result = self.call_program(gcc_cmd) assert compile_result['returncode'] == 0 run_cmd = './tmp.bin' run_result = self.call_program(run_cmd) assert run_result['returncode'] == 0 return Result(time=run_result['time'])
def run(self, desired_result, input, limit): cfg = desired_result.configuration.data solver = SOLVERS[0] cmd = CMD cmd += INSTANCE_FILE + BENCHMARK + CONFIG j = 0 for i in range (INSTANCES): cmd += ' ' + str(cfg["instances"][j]) if ((i > 0 and j < len(cfg["instances"]) - 1 and i % CHUNK_SIZE == 0) or (i == 0 and CHUNKS == INSTANCES)): j += 1 run_result = self.call_program(cmd, limit=TIMEOUT) if (run_result['timeout']): result = TIMEOUT else: result = run_result['time'] stdout = run_result['stdout'] stdout_time = float(stdout.split("Time: ")[1]) return Result(time=stdout_time)
def run_precompiled(self, desired_result, inp, limit, compile_result, result_id): if compile_result.state != 'OK': return compile_result log.info("Running configuration %d...", result_id) output_path = self.output_root + "/Build_{0:04d}".format(result_id) # SDSoC 2017.1 is not loading symbols from ELF file properly, so we have to # obtain the address of the exit function ourselves. Otherwise, we could # just have used the command "bpadd -addr &exit" in TCL. symbols = subprocess.check_output( ['nm', output_path + '/' + self.target_file]) exit_address = re.search(r'^(\S+) T exit$', symbols, re.MULTILINE).group(1) TCL_script = output_path + '/Run.tcl' with open(TCL_script, 'w') as script_file: script_file.write('''\ connect source {output_path}/_sds/p0/ipi/zed.sdk/ps7_init.tcl targets -set -nocase -filter {{name =~"APU*" && jtag_cable_name =~ "Digilent Zed*"}} -index 0 rst -system after 3000 targets -set -filter {{jtag_cable_name =~ "Digilent Zed*" && level==0}} -index 1 fpga -file {target_file}.bit targets -set -nocase -filter {{name =~"APU*" && jtag_cable_name =~ "Digilent Zed*"}} -index 0 loadhw {output_path}/_sds/p0/ipi/zed.sdk/zed.hdf ps7_init ps7_post_config targets -set -nocase -filter {{name =~ "ARM*#0" && jtag_cable_name =~ "Digilent Zed*"}} -index 0 dow {target_file} bpadd -addr 0x{exit_address} con -block '''.format(output_path=output_path, target_file=self.target_file, exit_address=exit_address)) run_script = output_path + '/Run.sh' with open(run_script, 'w') as script_file: script_file.write('''\ #!/bin/bash -e source {sdsoc_root}/settings64.sh cd {output_path} stty -F {serial_device} {serial_baudrate} raw cat {serial_device} > Serial_output.log & sdx -batch -source {TCL_script} kill $! '''.format(sdsoc_root=self.sdsoc_root, output_path=output_path, serial_device=self.serial_device, serial_baudrate=self.serial_baudrate, TCL_script=TCL_script)) os.chmod(run_script, os.stat(run_script).st_mode | stat.S_IXUSR) try: run_result = self.call_program('ssh ' + self.fpga_host + ' ' + run_script, limit=self.run_timeout) except OSError: return Result(state='RE?', msg='Unknown error while running.') with open(output_path + '/Run_output.log', 'w') as log_file: log_file.write(run_result['stdout']) with open(output_path + '/Run_error.log', 'w') as log_file: log_file.write(run_result['stderr']) if run_result['returncode'] != 0 and run_result['timeout']: log.error('Run timeout on configuration %d', result_id) return Result(state='RTO', msg='Timeout while running.') test_failed = True cycles = float('inf') with open(output_path + '/Serial_output.log', 'r') as output_file: for line in output_file: if line == "TEST PASSED\r\n": test_failed = False match = re.match(r'The hardware test took (\S+) cycles.\r\n', line) if match != None: cycles = match.group(1) else: log.error('Serial port produced invalid output.', result_id) return Result(state='RE2') if run_result['returncode'] != 0 or test_failed: log.error('Run error on configuration %d', result_id) return Result(state='RE1') log.info("Run of configuration %d was successful...", result_id) return Result(state='OK', msg='Test successful.', time=cycles)
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ # cfg = desired_result.configuration.data while True: # Configuration: {'kernel': 0, 'gpuId': 0, 'config': 'gx:1024, gy:1, gz:1, bx:1, by:1, bz:1, ', 'funcId': 7} configuration = desired_result.configuration.data print "Configuration: ", configuration cfg = { match.group(1): match.group(2) for match in re.finditer(r"([^:]+):(\S+)\s*,[ ']", configuration['config']) } print "CFG: ", cfg confBlock = int(cfg['bx']) * int(cfg['by']) * int(cfg['bz']) confGrid = int(cfg["'gx"]) * int(cfg['gy']) * int(cfg['gz']) config = confBlock * confGrid print "ConfBlock " + str(confBlock) print "ConfGrid " + str(confGrid) if ((confBlock <= 1024) and (confBlock % 32 == 0) and (config == n)): break else: return Result(time=FAIL_PENALTY) # print "desired: " + str(desired_result.configuration.data) # print "CFG: ", cfg print "compiled: ", 'true' if compiled else 'false' if not compiled: print "Compiling the program..." gcc_cmd = 'nvcc -I /usr/local/cuda/include -L /usr/local/cuda/lib64 -ccbin=g++-4.9 src/sumvector.cu -lcuda -lm -o sumvector-cuda' compile_result = self.call_program(gcc_cmd) assert compile_result['returncode'] == 0 print " OK.\n" global compiled compiled = not compiled run_cmd = 'nvprof --metrics inst_executed ./sumvector-cuda' #print "TESTE:" + " " + str(cfg['gx']) + " " + str(cfg['gy']) + " " + str(cfg['gz']) + str(cfg['bx']) + " " + str(cfg['by']) + " " + str(cfg['bz']) # confBlock = cfg['bx'] * cfg['by'] * cfg['bz'] # confGrid = cfg['gx'] * cfg['gy'] * cfg['gz'] # config = confBlock * confGrid # print "confBlock: ", confBlock # print "confGrid: ", confGrid #print "config: ", config # Evict kernel divergence, blocks with multiply warp size. #print "Test: ", "True" if((confBlock <= 1024) and (confBlock % 32 == 0)) else "False" print "Antes do IF" if ((confBlock <= 1024) and (confBlock % 32 == 0) and (config == n)): dimBlock = 0 dimGrid = 0 # Test of quantity of block dimensions are used. # a if test else b dimBlock += 1 if (int(cfg['bx']) > 1) else 0 dimBlock += 1 if (int(cfg['by']) > 1) else 0 dimBlock += 1 if (int(cfg['bz']) > 1) else 0 if (dimBlock == 0): dimBlock = 1 # Test of quantity of grid dimensions are used. dimGrid += 1 if (int(cfg["'gx"]) > 1) else 0 dimGrid += 1 if (int(cfg['gy']) > 1) else 0 dimGrid += 1 if (int(cfg['gz']) > 1) else 0 if (dimGrid == 0): dimGrid = 1 if (dimGrid == 1): cfg['funcId'] = dimGrid + dimBlock - 2 if (dimGrid == 2): cfg['funcId'] = dimGrid + dimBlock + 0 if (dimGrid == 3): cfg['funcId'] = dimGrid + dimBlock + 2 run_cmd += ' {0}'.format(configuration['kernel']) run_cmd += ' {0}'.format(cfg["'gx"]) run_cmd += ' {0}'.format(cfg['gy']) run_cmd += ' {0}'.format(cfg['gz']) run_cmd += ' {0}'.format(cfg['bx']) run_cmd += ' {0}'.format(cfg['by']) run_cmd += ' {0}'.format(cfg['bz']) run_cmd += ' {0}'.format(configuration['n']) run_cmd += ' {0}'.format(configuration['gpuId']) print "Running command line: ", run_cmd #print "CFG->funcId: " + str(cfg['funcId']) run_result = self.call_program(run_cmd) if run_result['returncode'] != 0: return Result(time=FAIL_PENALTY) else: val = self.get_metric_from_app_output(run_result['stderr']) return Result(time=val) else: print "Invalid configuration, return penalty." # FAIL_PENALTY = FAIL_PENALTY - 1 return Result(time=FAIL_PENALTY)
def compile(self, config_data, result_id): log.info("Building configuration %d...", result_id) output_path = self.output_root + "/Build_{0:04d}".format(result_id) os.mkdir(output_path) defines = '' for param, value in config_data.items(): if param == 'DATA_MOVER_CLOCK': data_mover_clock = str(value) elif param == 'ACCELERATOR_1_CLOCK': accelerator_1_clock = str(value) elif param == 'ACCELERATOR_2_CLOCK': accelerator_2_clock = str(value) elif param == 'ACCELERATOR_1_UNCERTAINTY': accelerator_1_uncertainty = str(value) elif param == 'ACCELERATOR_2_UNCERTAINTY': accelerator_2_uncertainty = str(value) else: defines += ' -D{0}={1}'.format(param, value) build_script = output_path + '/Build.sh' with open(build_script, 'w') as script_file: script_file.write('''\ #!/bin/bash -e Exit_handler() {{ EXIT_VALUE=$? [ ${{EXIT_VALUE}} == 124 ] && echo "Build timed out." exit ${{EXIT_VALUE}} }} trap Exit_handler exit source "$SDSOC_ROOT/settings64.sh" export HLS_TUNER_ROOT={hls_tuner_root} "$HLS_TUNER_ROOT/Scripts/Monitor.sh" "timeout {build_timeout}s \\ make -f {make_file} clean all \\ JOBS={max_jobs} \\ THREADS={max_threads} \\ HLS_TUNER_DEFINES='{defines}' \\ HLS_TUNER_DATA_MOVER_CLOCK={data_mover_clock} \\ HLS_TUNER_ACCELERATOR_1_CLOCK={accelerator_1_clock} \\ HLS_TUNER_ACCELERATOR_2_CLOCK={accelerator_2_clock}" \\ Monitor.log.gz '''.format(hls_tuner_root=self.hls_tuner_root, build_timeout=self.build_timeout, make_file=self.make_file, max_jobs=self.max_jobs, max_threads=self.max_threads, defines=defines, data_mover_clock=data_mover_clock, accelerator_1_clock=accelerator_1_clock, accelerator_2_clock=accelerator_2_clock)) with open(output_path + '/Filter_hor.tcl', 'w') as script_file: script_file.write('set_clock_uncertainty ' + accelerator_1_uncertainty + '%\n') with open(output_path + '/Filter_ver.tcl', 'w') as script_file: script_file.write('set_clock_uncertainty ' + accelerator_2_uncertainty + '%\n') with open(self.make_file, 'r') as file_handle: data = file_handle.read() self.target_file = re.search(r'^main-build: (\S+)', data, re.MULTILINE).group(1) for attempt in range(0, 5): if attempt > 0: log.info("Repeating build of configuration %d...", result_id) backup_path = output_path + '/Attempt_' + str(attempt) os.mkdir(backup_path) for filename in os.listdir(output_path): if not filename.startswith( 'Attempt_') and not filename == 'Build.sh': os.rename(output_path + '/' + filename, backup_path + '/' + filename) if not self.fake_build: build_result = self.run_on_grid(result_id, output_path, build_script, '-q \'70s*\' -now y') if self.grid_unavailable(build_result): log.info('No 70s are available. Configuration %d will fall back to' \ ' icsafe machines.', result_id) build_result = self.run_on_grid(result_id, output_path, build_script, '-q \'!60s*\'') if self.grid_unavailable(build_result): log.info( 'No icsafe machines are available. Configuration %d will' ' fall back to 60s.', result_id) build_result = self.run_on_grid(result_id, output_path, build_script, '') else: shutil.copy(self.fake_build_source + '/Build_output.log', output_path) shutil.copy(self.fake_build_source + '/' + self.target_file, output_path) shutil.copy( self.fake_build_source + '/' + self.target_file + '.bit', output_path) os.makedirs(output_path + '/_sds/p0/ipi/zed.sdk') shutil.copy( self.fake_build_source + '/_sds/p0/ipi/zed.sdk/ps7_init.tcl', output_path + '/_sds/p0/ipi/zed.sdk') shutil.copy( self.fake_build_source + '/_sds/p0/ipi/zed.sdk/zed.hdf', output_path + '/_sds/p0/ipi/zed.sdk') build_result = {'returncode': 0} try: with open(output_path + '/Build_output.log', 'r') as log_file: lines = log_file.read() except: code = 'LE?' return code if re.search(r'Build timed out.', lines) != None: result = Result(state='BTO', msg='Build timed out.') elif re.search(r'\[Place 30-640\]', lines) != None: result = Result(state='BE0', msg='Too many LUTs') elif re.search(r'\[SCHED 204-80\]', lines) != None: result = Result(state='BE1', msg='Dependency error') elif re.search(r'\[XFORM 203-504\]', lines) != None: result = Result(state='BE2', msg='Too much unrolling') elif re.search(r'\[XFORM 203-1403\]', lines) != None: result = Result(state='BE3', msg='Too many load/store instructions') elif re.search(r'\[Timing 38-282\]', lines) != None: result = Result(state='TIMING', msg='Timing constraints not met') elif re.search(r'\[Timing 38-246\]', lines) != None: result = Result(state='BE5', msg='Thread error') elif re.search(r'\[Common 17-179\]', lines) != None: result = Result(state='BE9', msg='Fork failed.') elif re.search(r'Scripts Generated : progress 0%', lines) != None: result = Result( state='BE6', msg='Unknown error at 0% of bitstream generation') elif re.search(r'Moving function[^\n]*\n[^\n]*failed$', lines, re.MULTILINE) != None: result = Result(state='BE7', msg='Unknown error while moving function') elif re.search(r'This may take some time[^\n]*\n[^\n]*failed$', lines, re.MULTILINE) != None: result = Result(state='BE4', msg='Unknown error while generating bitstream') elif re.search(r'Finished building target:', lines) == None: result = Result(state='BE?', msg='Unknown build error') else: result = Result(state='OK', msg='Build was successful.') log.info("Attempt %d: %s (%s)", attempt, result.msg, result.state) if not result.state in ['BE4', 'BE5', 'BE6', 'BE7', 'BE9', 'BE?']: break if result.state == 'OK': log.info("Build of configuration %d was successful...", result_id) elif result.state == 'BTO': log.error("Build timeout on configuration %d", result_id) elif result.state == 'TIMING': log.error('Timing not met on configuration %d', result_id) else: log.error("Build error on configuration %d", result_id) return result
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ newRatio = desired_result.configuration.data['NewRatio'] print("command line arguments are: \n threads: {0} \n newratio: {1} \n maximum heap size: {2}".format(threads, newratio, Xmx)) gcc_cmd = 'docker exec -d merge_container java -Xmx{} -jar -XX:+UnlockDiagnosticVMOptions -XX:+LogCompilation '.format( Xmx) gcc_cmd += '-XX:NewRatio={} '.format(newRatio) gcc_cmd += '-XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/mnt/gc{}.log /mnt/merge_sort_service.jar '.format(time.time()) self.call_program(gcc_cmd) print("Prime service up and running...") if os.path.exists( "/home/nayananga/Desktop/wos2/jmeter/volume/results/results_threads_{0}/results_threads_{0}.jtl".format( threads)): os.remove( "/home/nayananga/Desktop/wos2/jmeter/volume/results/results_threads_{0}/results_threads_{0}.jtl".format( threads)) os.remove( "/home/nayananga/Desktop/wos2/jmeter/volume/results/results_threads_{0}/results_threads_{0}-measurement.jtl".format( threads)) os.remove( "/home/nayananga/Desktop/wos2/jmeter/volume/results/results_threads_{0}/results_threads_{0}-warmup.jtl".format( threads)) time.sleep(20) print("JMeter test started in container...") run_cmd = 'docker exec jmeter_container java -jar /apache-jmeter-5.2.1/bin/ApacheJMeter.jar -n -t /mnt/MergeSortTestPlan.jmx -Jthreads={0} -l /mnt/results/results_threads_{0}/results_threads_{0}.jtl -q /mnt/user.properties '.format( threads) self.call_program(run_cmd) split_cmd = "java -jar /home/nayananga/Desktop/wos2/jtl-splitter-0.4.1-SNAPSHOT.jar -f /home/nayananga/Desktop/wos2/jmeter/volume/results/results_threads_{0}/results_threads_{0}.jtl -s -t 5".format( threads) self.call_program(split_cmd) t1 = pd.read_csv( '/home/nayananga/Desktop/wos2/jmeter/volume/results/results_threads_{0}/results_threads_{0}-measurement.jtl'.format( threads)) run_result = t1.elapsed.mean() if os.path.exists( "/home/nayananga/Desktop/wos2/jmeter/volume/results/results_threads_{0}/results_threads_{0}.jtl".format( threads)): os.remove( "/home/nayananga/Desktop/wos2/jmeter/volume/results/results_threads_{0}/results_threads_{0}.jtl".format( threads)) os.remove( "/home/nayananga/Desktop/wos2/jmeter/volume/results/results_threads_{0}/results_threads_{0}-measurement.jtl".format( threads)) os.remove( "/home/nayananga/Desktop/wos2/jmeter/volume/results/results_threads_{0}/results_threads_{0}-warmup.jtl".format( threads)) print("average response time is : {}".format(run_result)) save_to_history(newRatio, run_result) return Result(time=run_result)
def run(self, desired_result, input, limit): """ Compile and run a given configuration then return performance """ map_param = [ 'effort', 'ignore_carry_buffers', 'ignore_cascade_buffers', 'optimize', 'state_machine_encoding' ] fit_param = [ 'effort', 'one_fit_attempt', 'optimize_io_register_for_timing', 'pack_register', 'tdc' ] cfg = desired_result.configuration.data result_id = desired_result.id top_module = self.top_module target_family = self.target_family target_device = self.target_device f = open('./options.tcl', 'w') f.write('execute_module -tool map -args "--family=' + target_family + ' --part=' + target_device + ' ') for param in map_param: f.write('--' + param + '=' + cfg['map_' + param] + ' ') f.write('"\n') f.write('execute_module -tool fit -args "--part=' + target_device + ' ') for param in fit_param: f.write('--' + param + '=' + cfg['fit_' + param] + ' ') f.write('"\n') f.close() if hasattr(self, 'sweep'): sweep = self.sweep genfile = self.genfile if len(sweep) != 0: # generate verilog design file; this is to integrate the libcharm genverilog scripts sweepparam = int(sweep[0][1]) sweeparg_str = "" for arg in sweep: sweeparg_str = sweeparg_str + arg[1] + ' ' genveri = 'cd design; python ' + genfile + ' ' + sweeparg_str + '; cd ..' subprocess.Popen(genveri, shell=True).wait() # Replace the top module name in tcl file tclmodcmd = 'sed \'s/TOPMODULE/' + top_module + '/g\' run_quartus.tcl > run_quartus_sweep.tcl' subprocess.Popen(tclmodcmd, shell=True).wait() print "Starting " + str(sweepparam) cmd = 'quartus_sh -t ./run_quartus_sweep.tcl' #cmd = 'ls' run_result = self.call_program(cmd) assert run_result['returncode'] == 0 result, metadata = self.get_qor() self.dumpresult(cfg, result, metadata) cleanupcmd = 'rm run_quartus_sweep.tcl' subprocess.Popen(cleanupcmd, shell=True).wait() print "Finished " + str(sweepparam) else: tclmodcmd = 'sed -e \'s:TOPMODULE:' + top_module + ':g\' ' + 'run_quartus.tcl > run.tcl' subprocess.Popen(tclmodcmd, shell=True).wait() cmd = 'quartus_sh -t ./run.tcl' run_result = self.call_program(cmd) assert run_result['returncode'] == 0 result, metadata = self.get_qor() self.dumpresult(cfg, result, metadata) return Result(time=result)