def runThroughput(modelName, net, params): log.info('Running prediction...') b = params[const.BATCH] fns = getter.apiGetTestInputs(modelName, b) tsum = 0 # Load the entire batch into system/accelerator memory for iterating imgs = [] for i in range(b): img = preprocessor.apiProcess(modelName, fns.pop()) imgs.append(img) # TPU takes different format, unlike others imgs = np.array(imgs) imgs = imgs.flatten() # Allow one warmup prediction outside the overall timing loop times = [] t0 = time.time() net.predict(imgs) tn = time.time() t = tn - t0 times.append(t) # Overall timing loop for throughput t_start = time.time() for i in range(params[const.RUNITERATIONS]): t0 = time.time() net.predict(imgs) tn = time.time() t = tn - t0 times.append(t) t_finish = time.time() times.insert(0, t_finish - t_start) return times
def runThroughput(modelName, net, params): log.info('Running prediction...') b = params[const.BATCH] fns = getter.apiGetTestInputs(modelName, b) tsum = 0 # Load the entire batch into system/accelerator memory for iterating imgs = [] for i in range(b): if params[const.PRECISION] == const.FP32: img = preprocessor.apiProcess(modelName, fns.pop()) if params[const.PRECISION] == const.INT8: img = preprocessor.apiProcess_int8(modelName, fns.pop()) imgs.append(img) # Allow one warmup prediction outside the overall timing loop times = [] t = net.predict_runtime(imgs, params) times.append(t) # Overall timing loop for throughput t_start = time.time() for i in range(params[const.RUNITERATIONS]): # individual timing for latency; we don't support concurrency > 1 t = net.predict_runtime(imgs, params) times.append(t) t_finish = time.time() times.insert(0, t_finish - t_start) return times
def runLatency(modelName, net, params): log.info('Running prediction...') fns = getter.apiGetTestInputs(modelName, 1) img = preprocessor.apiProcess(modelName, fns[0]) times = [] for j in range(params[const.RUNITERATIONS]): t0 = time.time() res = net.predict(img) tn = time.time() t = tn - t0 log.debug('%f sec' % t) times.append(t) return times
def runThroughput(modelName, net, params): log.info('Running prediction...') b = params[const.BATCH] fns = getter.apiGetTestInputs(modelName, b) tsum = 0 # Load the entire batch into system/accelerator memory for iterating imgs_list = [] for i in range(b): img = preprocessor.apiProcess(modelName, fns[i]) imgs_list.append(img) imgs_np = numpy.asarray(imgs_list) results = net.predict(fns[0], imgs_np, 5, 0, params[const.RUNITERATIONS]) #tsum=sum(results['seconds']) # New schema is [ total_time_s, time0, time1, ..., timeN ] return results['seconds']
def run(modelName, modelFileName, params): # TODO careful this over-writes (not appends to) original if (params[const.HARDWARE] == 'gpu'): os.environ['LD_LIBRARY_PATH'] = commonDir + ':' + os.path.join( commonDir, 'GpuAcc') else: os.environ['LD_LIBRARY_PATH'] = commonDir + ':' + os.path.join( commonDir, 'CpuAcc') tmpdir = tempfile.mkdtemp() inputFn = os.path.join(tmpdir, 'inputs.json') outputFn = os.path.join(tmpdir, 'outputs.json') if params[const.MODE] == const.ACCURACY: imageFileNames = getter.apiGetValidationInputs(modelName, cache=True) else: imageFileNames = getter.apiGetTestInputs(modelName, params[const.BATCH], cache=True) cxxParams = { 'images': imageFileNames, # Note: this can be up to 5,000 filenames 'model': os.path.join(paths.MODELS, 'tensorflow', modelName, 'frozen_graph.pb'), 'params': params } with open(inputFn, 'w') as fp: json.dump(cxxParams, fp) exeCmd = os.path.join(commonDir, '..', modelName, modelName + '.exe') cmd = [exeCmd, inputFn, outputFn] log.info('Running prediction...') log.debug(cmd) ret = subprocess.call(cmd) if ret: log.error('Inference failed') return None log.info('Loading results file %s' % outputFn) with open(outputFn) as fp: returnData = json.load(fp) if params[const.MODE] == const.ACCURACY: return returnData['predictions'] else: return returnData['times']
def runThroughput(modelName, net, params): log.info('Running throughput...') fns = getter.apiGetTestInputs(modelName, params[const.BATCH], cache=True) times = net.predict(fns, params[const.RUNITERATIONS]) return times
def runLatency(modelName, net, params): log.info('Running latency...') fns = getter.apiGetTestInputs(modelName, 1, cache=True) times = net.predict(fns, params[const.RUNITERATIONS]) return times