def run(self): tempdir = tempfile.mkdtemp() getLogger().info("Temp directory: {}".format(tempdir)) info = self._getInfo() frameworks = getFrameworks() assert getArgs().framework in frameworks, \ "Framework {} is not supported".format(getArgs().framework) framework = frameworks[getArgs().framework](tempdir) bcollector = BenchmarkCollector(framework, getArgs().model_cache) benchmarks = bcollector.collectBenchmarks(info, getArgs().benchmark_file) platforms = getPlatforms(tempdir) threads = [] for platform in platforms: t = threading.Thread(target=self.runBenchmark, args=(info, platform, benchmarks, framework)) t.start() threads.append(t) for t in threads: t.join() shutil.rmtree(tempdir, True)
def _downloadRepoFile(self, location, tgt_dir, commit_hash): """ location: //repo/fbsource/fbcode/aibench/...../a.py """ raw_scm_query = pkg_resources.resource_string( "aibench", "benchmarking/bin/scm_query.par") query_exe = os.path.join(tgt_dir, "scm_query.par") with open(query_exe, "wb") as f: f.write(raw_scm_query) cmd = ['chmod', '+x', os.path.join(tgt_dir, "scm_query.par")] subprocess.check_output(cmd) dirs = location[2:].split("/") tgt_file = os.path.join(tgt_dir, dirs[-1]) cmd = [ query_exe, '--repo', dirs[1], '--file_path', '/'.join(dirs[2:]), '--target_file', tgt_file, '--commit_hash', commit_hash ] getLogger().info("Downloading {}".format(location)) subprocess.check_output(cmd) os.remove(query_exe) return tgt_file
def _updateDevices(self, result_dict): status = result_dict["status"] output = result_dict["output"] if status == 0: self.job["status"] = "DONE" elif status == 1: self.job["status"] = "USER_ERROR" else: self.job["status"] = "FAILED" if not output: getLogger().error("Error, output are None") else: outputs = output.split("\n") for o in outputs: getLogger().info(o) if sys.getsizeof(output) > LOG_LIMIT: getLogger().error("Error, output are too large") output = output[-LOG_LIMIT:] self.job["log"] = output device = self.devices[self.job["device"]][self.job["hash"]] device["output_dir"] = self.tempdir device["done_time"] = time.ctime() return device
def run(self): reboot = self.args.reboot and \ (self.force_reboot or self.device["reboot_time"] + REBOOT_INTERVAL < datetime.datetime.now()) assert self.device["available"] is False, \ "The device to cool down should not be available" success = True if reboot: raw_args = [] raw_args.extend(["--platform", self.args.platform]) raw_args.extend(["--device", self.device["hash"]]) raw_args.extend(["--android_dir", self.args.android_dir]) reboot_device(raw_args=raw_args) time.sleep(120) self.device["reboot_time"] = datetime.datetime.now() if self.args.reboot: # for ios/android time.sleep(180) else: time.sleep(20) with LOCK: getLogger().debug("CoolDownDevice lock acquired") if success: self.device["available"] = True else: self.device["live"] = False device_str = getDevicesString([self.device]) self.db.updateDevices(self.args.claimer_id, device_str, False) getLogger().debug("CoolDownDevice lock released") getLogger().info("Device {}({}) available".format( self.device["kind"], self.device["hash"]))
def _buildProgram(self, tempdir): # build binary platform = self.args.platform program = tempdir + "/program" if os.name == "nt": program = program + ".exe" elif platform.startswith("ios"): program = program + ".ipa" if self.prebuilt_binary: program = self.prebuilt_binary else: print("Building program...") success = buildProgramPlatform(program, self.args.repo_dir, self.args.framework, self.args.frameworks_dir, self.args.platform) if not success: return # upload all files under the fname directory filedir = os.path.dirname(program) allfiles = [] if os.path.exists(filedir): if self.prebuilt_binary: allfiles = [program] else: allfiles = [ os.path.join(filedir, f) for f in os.listdir(filedir) ] for fn in allfiles: filename, _ = self.file_handler.uploadFile( fn, None, None, False) getLogger().info("program: {}".format(filename)) self.filenames[os.path.basename(fn)] = filename # main program needs to be in self.filenames["program"] = self.filenames[os.path.basename( program)] else: self.filenames["program"] = program
def _checkDevices(self): """ Run any device health checks, e.g. connectivity, battery, etc. """ try: online_hashes = getDeviceList(self.args, silent=True) offline_devices = [ device for device in self.online_devices if device["hash"] not in online_hashes ] new_devices = [ h for h in online_hashes if h not in [p["hash"] for p in self.online_devices] ] if offline_devices: for offline_device in offline_devices: if "rebooting" not in self.lab_devices[ offline_device["kind"]][offline_device["hash"]]: getLogger().error( "Device {} has become unavailable.".format( offline_device)) self._disableDevice(offline_device) # TODO: self._sendErrorReport() if new_devices: devices = ",".join(new_devices) devices = self._getDevices(devices) if devices: for d in devices: self._enableDevice(d) if d["hash"] not in [ device["hash"] for device in self.online_devices ]: self.online_devices.append(d) getLogger().info("New device added: {}".format(d)) except BaseException as ex: getLogger().error("Error while checking devices. {}".format(ex))
def _runBenchmarks(self, jobs_queue): # run the jobs in job queue run_ids = ",".join([str(job["id"]) for job in jobs_queue]) self.db.runBenchmarks(self.args.claimer_id, run_ids) run_devices = [self.devices[job["device"]][job["hash"]] for job in jobs_queue] self.db.updateDevices(self.args.claimer_id, getDevicesString(run_devices), False) self._downloadFiles(jobs_queue) # run the benchmarks for job in jobs_queue: tempdir = tempfile.mkdtemp(prefix="aibench") raw_args = self._getRawArgs(job, tempdir) self.devices[job["device"]][job["hash"]]["start_time"] = time.ctime() identifier = job["identifier"] getLogger().info("Running job with identifier {}".format(identifier)) async_runner = runAsync(self.args, self.devices, self.db, job, tempdir) # Watchdog will be used to kill currently running jobs # based on user requests app = WatchDog( async_runner, async_runner.didUserRequestJobKill, async_runner.killJob ) global RUNNING_JOBS RUNNING_JOBS += 1 """ Python's multiprocessing need to pickle things to sling them in different processes. However, bounded methods are not pickable, so the way it's doing it here doesn't work. Thus, I added __call__ method to the class we are passing into the apply_async method. Ref: https://stackoverflow.com/a/6975654 """ self.pool.apply_async(app, args=[raw_args], callback=app.main.callback)
def startLogging(self): while self.running: if self.log and time.time() >= self.lastreq + self.interval: try: output = "\n".join(self.log) if sys.getsizeof(output) > LOG_LIMIT: self.running = False output = trimLog(output) else: self.log = [output] status = self.db.updateLogBenchmarks(self.id, output) if status != 'success': getLogger().error("Error updating logs.") self.retries_left -= 1 if self.retries_left == 0: self.running = False getLogger().error( "Max failed attempts reached for log updates. Stopping log update requests." ) else: self.retries_left = self.retries self.lastreq = time.time() except Exception as ex: getLogger().error(ex) self.running = False time.sleep(1)
def downloadFile(self, location, md5): if location.startswith("http"): dirs = location.split(":/") replace_pattern = { ' ': '-', '\\': '-', ':': '/', } path = os.path.join(self.root_model_dir, getFilename(location, replace_pattern=replace_pattern)) elif not location.startswith("//"): return else: dirs = location[2:].split("/") if len(dirs) <= 2: return path = self.root_model_dir + location[1:] if os.path.isfile(path): if md5: getLogger().info("Calculate md5 of {}".format(path)) file_hash = None with open(path, 'rb') as f: file_hash = hashlib.md5() for chunk in iter(lambda: f.read(8192), b''): file_hash.update(chunk) new_md5 = file_hash.hexdigest() del file_hash gc.collect() if md5 == new_md5: getLogger().info("File {}".format(os.path.basename(path)) + " is cached, skip downloading") return path else: # assume the file is the same return path downloader_controller = DownloadFile(dirs=dirs, logger=self.logger, args=self.args) downloader_controller.download_file(location, path) return path
def _downloadFiles(self, jobs_queue): for job in jobs_queue: job["models_location"] = [] # download the models path = self._saveBenchmarks(job) location = self.benchmark_downloader.run(path) job["models_location"].extend(location) # download the programs if "info" not in job["benchmarks"]: continue try: if "treatment" not in job["benchmarks"]["info"]: getLogger().error("Field treatment " "must exist in job[\"benchmarks\"]") elif "programs" not in job["benchmarks"]["info"]["treatment"]: getLogger().error( "Field \"program\" must exist in " "job[\"benchmarks\"][\"info\"][\"treatment\"]") else: treatment_info = job["benchmarks"]["info"]["treatment"] treatment_locations = self._downloadBinaries( treatment_info) job["programs_location"] = treatment_locations if "control" in job["benchmarks"]["info"]: if "programs" not in job["benchmarks"]["info"]["control"]: getLogger().error( "Field \"program\" must exist in " "job[\"benchmarks\"][\"info\"][\"control\"]") else: control_info = job["benchmarks"]["info"]["control"] control_locations = self._downloadBinaries( control_info) job["programs_location"].extend(control_locations) except Exception: getLogger().error("Unknown exception {}".format( sys.exc_info()[0])) getLogger().error("File download failure")
def _getStatistics(array, stats=_default_statistics): if len(array) == 0: return {} if "p50" not in stats: stats.append( "p50" ) # always include p50 since it is needed for internal calculations sorted_array = sorted(array) median = _getMedian(sorted_array) mean = _getMean(array) stdev = _getStdev(array, mean) meta_values = { "mean": mean, "p50": median, # special case for even-numbered arrays "stdev": stdev, "MAD": _getMedian(sorted(map(lambda x: abs(x - median), sorted_array))), "cv": stdev / mean if mean != 0 else None, } results = {} for stat in stats: if stat in meta_values: results[stat] = meta_values[stat] else: percentile_arg_value = _percentileArgVal(stat) # parses p0-p100 if percentile_arg_value is None: getLogger().error( f"Unsupported custom statistic '{stat}' ignored.") assert (percentile_arg_value is not None), f"Unsupported custom statistic '{stat}'." else: results[stat] = _getPercentile(sorted_array, percentile_arg_value) return results
def _saveBenchmarks(self, jobs_queue): benchmark_files = [] # save benchmarks to files for job in jobs_queue: benchmarks = job["benchmarks"] benchmark = benchmarks["benchmark"] content = benchmark["content"] benchmark_str = json.dumps(content) outfd, path = tempfile.mkstemp() with os.fdopen(outfd, "w") as f: f.write(benchmark_str) job["benchmarks"]["benchmark"]["content"] = path if content["tests"][0]["metric"] == "generic": job["framework"] = "generic" elif "model" in content and "framework" in content["model"]: job["framework"] = content["model"]["framework"] else: getLogger().error("Framework is not specified, " "use Caffe2 as default") job["framework"] = "caffe2" benchmark_files.append(path) return benchmark_files
def report(self, content): data = content[self.DATA] if data is None or len(data) == 0: getLogger().info("No data to write") return meta = content[self.META] net_name = meta['net_name'] netdir = getFilename(net_name) platform_name = meta[self.PLATFORM] platformdir = getFilename(platform_name) framework_name = meta["framework"] frameworkdir = getFilename(framework_name) metric_name = meta['metric'] metric_dir = getFilename(metric_name) id_dir = getFilename(meta["identifier"]) ts = float(meta['commit_time']) commit = meta['commit'] datedir = getDirectory(commit, ts) dirname = os.path.join(getArgs().local_reporter, platformdir, frameworkdir, netdir, metric_dir, id_dir, datedir) i = 0 while os.path.exists(os.path.join(dirname, str(i))): i = i + 1 dirname = os.path.join(dirname, str(i)) os.makedirs(dirname) for d in data: filename = os.path.join(dirname, getFilename(d) + ".txt") content_d = json.dumps(data[d], indent=2, sort_keys=True) with open(filename, 'w') as file: file.write(content_d) filename = os.path.join(dirname, getFilename(self.META) + ".txt") with open(filename, 'w') as file: content_meta = json.dumps(meta, indent=2, sort_keys=True) file.write(content_meta) pname = platform_name if "platform_hash" in meta: pname = pname + " ({})".format(meta["platform_hash"]) getLogger().info("Writing file for {}: {}".format(pname, dirname))
def run(self): tempdir = tempfile.mkdtemp(prefix="aibench") getLogger().info("Temp directory: {}".format(tempdir)) info = self._getInfo() frameworks = getFrameworks() assert self.args.framework in frameworks, \ "Framework {} is not supported".format(self.args.framework) framework = frameworks[self.args.framework](tempdir, self.args) bcollector = BenchmarkCollector(framework, self.args.model_cache, args=self.args) benchmarks = bcollector.collectBenchmarks(info, self.args.benchmark_file) platforms = getPlatforms(tempdir, self.args) threads = [] for platform in platforms: t = threading.Thread(target=self.runBenchmark, args=(info, platform, benchmarks)) t.start() threads.append(t) for t in threads: t.join() if not self.args.debug: shutil.rmtree(tempdir, True) status = self.status | getRunStatus() if getRunKilled(): status_str = "killed" elif status == 0: status_str = "success" elif status == 1: status_str = "user error" elif status == 2: status_str = "harness error" else: status_str = "user and harness error" getLogger().info(" ======= {} =======".format(status_str)) return status if not getRunKilled() else RUN_KILLED
def run(self, raw_args): log_capture_string = StringIO() ch = logging.StreamHandler(log_capture_string) ch.setLevel(logging.DEBUG) getLogger().addHandler(ch) # verify download success before run if "download_error_log" in self.job: getLogger().error("Error downloading files for job. Skipping run.") status = USER_ERROR output = self.job["download_error_log"] else: try: app = BenchmarkDriver(raw_args=raw_args) getLogger().debug("RunBenchmark") status = app.run() except Exception as e: getLogger().error(e) finally: output = log_capture_string.getvalue() log_capture_string.close() getLogger().handlers.pop() return {"status": status, "output": output}
def collect(self, data, args=None): rows = self._prepareData(data) results = [] valid_run_idxs = [] for row in rows: try: result = json.loads(row) if ("type" in result and result["type"] == "NET" and "value" in result) \ or ("NET" in result): # for backward compatibility valid_run_idxs.append(len(results)) results.append(result) except Exception as e: # bypass one line getLogger().info("Skip one row %s \n Exception: %s" % (row, str(e))) pass if len(valid_run_idxs) > 0: # strip data not yet in a valid range # here it is assumed the NET metric appears earlier than # other metrics results = results[valid_run_idxs[0]:] return results, valid_run_idxs
def __init__(self, raw_args=None): self.args, self.unknowns = parser.parse_known_args(raw_args) self.benchmark_downloader = DownloadBenchmarks(self.args, getLogger()) self.adb = ADB(None, self.args.android_dir) devices = self._getDevices() setLoggerLevel(self.args.logger_level) if not self.args.benchmark_db_entry: assert self.args.server_addr is not None, \ "Either server_addr or benchmark_db_entry must be specified" while self.args.server_addr[-1] == '/': self.args.server_addr = self.args.server_addr[:-1] self.args.benchmark_db_entry = self.args.server_addr + "/benchmark/" self.db = DBDriver(self.args.benchmark_db, self.args.app_id, self.args.token, self.args.benchmark_table, self.args.job_queue, self.args.test, self.args.benchmark_db_entry) self.devices = {} for k in devices: kind = k["kind"] hash = k["hash"] entry = { "kind": kind, "hash": hash, "available": True, "live": True, "start_time": None, "done_time": None, "output_dir": None, "job": None, "adb": ADB(hash, self.args.android_dir), "reboot_time": datetime.datetime.now() - datetime.timedelta(hours=8) } if kind not in self.devices: self.devices[kind] = {} assert hash not in self.devices[kind], \ "Device {} ({}) is attached twice.".format(kind, hash) self.devices[kind][hash] = entry dvs = [self.devices[k][h] for k in self.devices for h in self.devices[k]] self.db.updateDevices(self.args.claimer_id, getDevicesString(dvs), True) if self.args.platform.startswith("host"): numProcesses = 2 else: numProcesses = multiprocessing.cpu_count() - 1 self.pool = multiprocessing.Pool(processes=numProcesses)
def downloadFile(self, location, md5): if location.startswith("http"): dirs = location.split(":/") replace_pattern = { ' ': '-', '\\': '-', ':': '/', } path = os.path.join( self.root_model_dir, getFilename(location, replace_pattern=replace_pattern)) elif not location.startswith("//"): return else: dirs = location[2:].split("/") if len(dirs) <= 2: return path = self.root_model_dir + location[1:] if os.path.isfile(path): if md5: m = hashlib.md5() fo = open(path, 'rb') m.update(fo.read()) new_md5 = m.hexdigest() fo.close() if md5 == new_md5: getLogger().info("File {}".format(os.path.basename(path)) + " is cached, skip downloading") return path else: # assume the file is the same return path downloader_controller = DownloadFile(dirs=dirs, logger=self.logger, args=self.args) downloader_controller.download_file(location, path) return path
def runOnPlatform(self, total_num, cmd, platform, platform_args, converter_class): if converter_class is None: converter_class = self.converters["json_with_identifier_converter"] converter = converter_class() results = [] num = 0 # emulate do...while... loop while True: output = platform.runBenchmark(cmd, platform_args=platform_args) one_result, valid_run_idxs = \ converter.collect(output, identifier=self.IDENTIFIER) valid_run_idxs = [num + idx for idx in valid_run_idxs] num += len(valid_run_idxs) results.extend(one_result) if num < total_num: num_items = len(valid_run_idxs) if num_items > 0: getLogger().info("%d items collected, Still missing %d " "runs. Collect again." % (num_items, total_num - num)) continue else: getLogger().info("No new items collected, " "finish collecting...") elif total_num >= 0 and num > total_num: # if collect more than the needed number, get the # latest entries. This may happen when the data in # the previous runs are not cleared. e.g. on some # android 5 devices. Or, it may happen when multiple # runs are needed to collect the desired number of # iterations results = results[valid_run_idxs[num - total_num]:] break metric = converter.convert(results) return metric
def report(self, content): data = copy.deepcopy(content[self.DATA]) if data is None or len(data) == 0: getLogger().info("No data to write") return meta = content[self.META] net_name = meta["net_name"] platform_name = meta[self.PLATFORM] framework_name = meta["framework"] metric_name = meta["metric"] ts = float(meta["commit_time"]) commit = meta["commit"] print("NET: {}\tMETRIC: {}\tID: {}".format(net_name, metric_name, meta["identifier"])) if "platform_hash" in meta: print("PLATFORM: {}\tHASH: {}".format(platform_name, meta["platform_hash"])) else: print("PLATFORM: {}".format(platform_name)) print("FRAMEWORK: {}\tCOMMIT: {}\tTIME: {}".format( framework_name, commit, datetime.datetime.fromtimestamp( int(ts)).strftime("%Y-%m-%d %H:%M:%S"), )) del_keys = [] for key in data: if key.startswith("NET"): self._printOneData(key, data[key]) del_keys.append(key) for key in del_keys: data.pop(key) for key in sorted(data): self._printOneData(key, data[key])
def callback(self, future_result_dict): """Decrement running jobs count, output job log, and start device cooldown.""" global RUNNING_JOBS RUNNING_JOBS -= 1 result = future_result_dict.result() job = result["job"] device = result["device"] device = self.devices[device["kind"]][device["hash"]] # output benchmark log in main thread. getLogger().info( "\n{}\n\nBenchmark:\t\t{}\nJob:\t\t\t{}\nDevice Kind:\t\t{}\nDevice Hash:\t\t{}\n{}\n\n{}" .format( "#" * 80, job["identifier"], job["id"], device["kind"], device["hash"], job["log"], "#" * 80, )) with LOCK: self._coolDown(device, force_reboot=job["status"] != "DONE")
def __init__(self, tempdir, tgt_dir, platform_util, hash_platform_mapping): self.tempdir = tempdir self.platform = None self.platform_hash = platform_util.device self.type = None self.util = platform_util self.tgt_dir = tgt_dir self.hash_platform_mapping = None if isinstance(hash_platform_mapping, string_types): # if the user provides filename, we will load it. try: with open(hash_platform_mapping) as f: self.hash_platform_mapping = json.load(f) except OSError as e: getLogger().info("OSError: {}".format(e)) except ValueError as e: getLogger().info('Invalid json: {}'.format(e)) else: # otherwise read from internal try: from aibench.specifications.hash_platform_mapping import hash_platform_mapping self.hash_platform_mapping = hash_platform_mapping except BaseException: pass
def getPlatforms(tempdir): platforms = [] if getArgs().platform[0:4] == "host" or \ getArgs().platform[0:5] == "linux" or \ getArgs().platform[0:3] == "mac": platforms.append(HostPlatform(tempdir)) elif getArgs().platform[0:7] == "android": driver = AndroidDriver() platforms.extend(driver.getAndroidPlatforms(tempdir)) if getArgs().excluded_devices: excluded_devices = getArgs().excluded_devices.strip().split(',') platforms = \ [p for p in platforms if p.platform not in excluded_devices and (p.platform_hash is None or p.platform_hash not in excluded_devices)] if getArgs().devices: plts = getArgs().devices.strip().split(',') platforms = [ p for p in platforms if p.platform in plts or p.platform_hash in plts ] if not platforms: getLogger().error("No platform or physical device detected.") return platforms
def report(self, content): data = content[self.DATA] if data is None or len(data) == 0: getLogger().info("No data to write") return meta = content[self.META] dirname = None if "identifier" in meta: id_dir = getFilename(meta["identifier"]) dirname = os.path.join(getArgs().simple_local_reporter, id_dir) else: dirname = tempfile.mkdtemp(dir=getArgs().simple_local_reporter) if os.path.exists(dirname): shutil.rmtree(dirname, True) os.makedirs(dirname) with open(os.path.join(dirname, "data.txt"), 'w') as file: content_d = json.dumps(data) file.write(content_d) pname = meta[self.PLATFORM] if "platform_hash" in meta: pname = pname + " ({})".format(meta["platform_hash"]) getLogger().info("Writing file for {}: {}".format(pname, dirname))
def downloadFile(self, location, md5): if location[0:2] != "//": return else: dirs = location[2:].split("/") if len(dirs) <= 2: return path = self.root_model_dir + location[1:] if os.path.isfile(path): if md5: m = hashlib.md5() m.update(open(path, 'rb').read()) new_md5 = m.hexdigest() if md5 == new_md5: getLogger().info("File {}".format(os.path.basename(path)) + " is cached, skip downloading") return else: # assume the file is the same return downloader_controller = DownloadFile(dirs=dirs, logger=self.logger, args=self.args) downloader_controller.download_file(location, path)
def report(self, content): data = copy.deepcopy(content[self.DATA]) if data is None or len(data) == 0: getLogger().info("No data to write") return meta = content[self.META] net_name = meta['net_name'] platform_name = meta[self.PLATFORM] framework_name = meta["framework"] metric_name = meta['metric'] ts = float(meta['commit_time']) commit = meta['commit'] print("NET: {}\tMETRIC: {}\tID: {}".format(net_name, metric_name, meta["identifier"])) if "platform_hash" in meta: print("PLATFORM: {}\tHASH: {}".format(platform_name, meta["platform_hash"])) else: print("PLATFORM: {}".format(platform_name)) print("FRAMEWORK: {}\tCOMMIT: {}\tTIME: {}". format(framework_name, commit, datetime.datetime.fromtimestamp( int(ts)).strftime('%Y-%m-%d %H:%M:%S'))) if "NET_DELAY" in data: self._printOneData("NET_DELAY", data["NET_DELAY"]) data.pop("NET_DELAY") data_values_iter = iter(data.values()) if "id" in next(data_values_iter): # Print per layer delay in order for key in sorted(data, key=lambda x: int(data[x]["id"][0])): self._printOneData(key, data[key]) else: for key in sorted(data): self._printOneData(key, data[key])
def _copyFile(self, field, destination_name, source): if "location" not in field: return False location = field["location"] if location[0:4] == "http": getLogger().info("Downloading {}".format(location)) r = requests.get(location) if r.status_code == 200: with open(destination_name, 'wb') as f: f.write(r.content) else: abs_name = self._getAbsFilename(field, source, None) shutil.copyfile(abs_name, destination_name) assert os.path.isfile(destination_name), \ "File {} cannot be retrieved".format(destination_name) # verify the md5 matches the file downloaded md5 = self._calculateMD5(destination_name) if md5 != field["md5"]: getLogger().info("Source file {} is changed, ".format(location) + " updating MD5. " + "Please commit the updated json file.") field["md5"] = md5 return True return False
def run(self, raw_args): log_capture_string = StringIO() ch = logging.StreamHandler(log_capture_string) ch.setLevel(logging.DEBUG) getLogger().addHandler(ch) try: app = BenchmarkDriver(raw_args=raw_args) status = app.run() except Exception as e: getLogger().error(e) output = log_capture_string.getvalue() log_capture_string.close() getLogger().handlers.pop() getLogger().debug("RunBenchmark") return {"status": status, "output": output}
def callback(self, result_dict): global RUNNING_JOBS try: with LOCK: device = self._updateDevices(result_dict) self._submitDone(device) if self.args.platform.startswith("host"): self._removeBenchmarkFiles(device) self._coolDown(device) RUNNING_JOBS -= 1 time.sleep(1) except Exception as e: getLogger().error("Encoutered fatal error in benchmark callback") getLogger().error(e) getLogger().error( "Benchmark submission and device release might have partially " "completed leaving aibench in a broken state") getLogger().error("Terminating...") os._exit(1)
def _retrievePowerData(power_data, start_idx, end_idx, num_iters): data = {} if start_idx < 0 or end_idx < 0: return data # get base current. It is just an approximation THRESHOLD = 150 num = len(power_data) i = end_idx sum = 0 count = 0 for i in range(end_idx, num): if power_data[i]["current"] < THRESHOLD: sum += power_data[i]["current"] count += 1 base_current = sum / count if count > 0 else 0 energy = 0 prev_time = power_data[start_idx - 1]["time"] for i in range(start_idx, end_idx): entry = power_data[i] curr_time = entry["time"] energy += entry["voltage"] * \ (entry["current"] - base_current) * (curr_time - prev_time) prev_time = curr_time total_time = power_data[end_idx]["time"] - power_data[start_idx]["time"] power = energy / total_time energy_per_inference = energy / num_iters latency = total_time * 1000 * 1000 / num_iters data["energy"] = _composeStructuredData(energy_per_inference, "energy", "mJ") data["power"] = _composeStructuredData(power, "power", "mW") data["latency"] = _composeStructuredData(latency, "latency", "uS") getLogger().info("Base current: {} mA".format(base_current)) getLogger().info( "Energy per inference: {} mJ".format(energy_per_inference)) getLogger().info("Power: {} mW".format(power)) getLogger().info("Latency per inference: {} uS".format(latency)) return data