def inference(bmodel_path, input_path, loops, tpu_id, compare_path): """ Load a bmodel and do inference. Args: bmodel_path: Path to bmodel input_path: Path to input file loops: Number of loops to run tpu_id: ID of TPU to use compare_path: Path to correct result file Returns: True for success and False for failure """ # set configurations load_from_file = True detected_size = (416, 416) threshold = 0.5 nms_threshold = 0.45 num_classes = 80 cap = cv2.VideoCapture(input_path) # init Engine and load bmodel if load_from_file: # load bmodel from file net = sail.Engine(bmodel_path, tpu_id, sail.IOMode.SYSIO) else: # simulate load bmodel from memory f = open(file=bmodel_path, mode='rb') bmodel = f.read() f.close() net = sail.Engine(bmodel, len(bmodel), tpu_id, sail.IOMode.SYSIO) # get model info graph_name = net.get_graph_names()[0] input_name = net.get_input_names(graph_name)[0] reference = get_reference(compare_path) status = True # pipeline of inference for i in range(loops): # read an image ret, img = cap.read() if not ret: print("Finished to read the video!") break # preprocess data = preprocess(img, detected_size) input_data = {input_name: np.array([data], dtype=np.float32)} output = net.process(graph_name, input_data) # postprocess bboxes, classes, probs = postprocess(output, img, detected_size, threshold) # print result if compare(reference, bboxes, classes, probs, i): for bbox, cls, prob in zip(bboxes, classes, probs): message = "[Frame {} on tpu {}] Category: {}, Score: {:.3f}, Box: {}" print(message.format(i + 1, tpu_id, cls, prob, bbox)) else: status = False break cap.release() return status
def main(): """ An example shows infernece of one model on multiple TPUs. """ # init Engine to load bmodel and allocate input and output tensors # one engine for one TPU engines = list() thread_num = len(ARGS.tpu_id) for i in range(thread_num): engines.append(sail.Engine(ARGS.bmodel, ARGS.tpu_id[i], sail.SYSIO)) # create threads for inference threads = list() status = [None] * thread_num for i in range(thread_num): threads.append(threading.Thread(target=thread_infer, args=(i, engines[i], \ ARGS.input, ARGS.loops, \ ARGS.compare, status))) for i in range(thread_num): threads[i].start() for i in range(thread_num): threads[i].join() # check status for stat in status: if not stat: sys.exit(-1) sys.exit(0)
def main(): """ An example shows inference of one model by multiple threads on one TPU. """ # init Engine engine = sail.Engine(ARGS.tpu_id) # load bmodel without builtin input and output tensors # each thread manage its input and output tensors engine.load(ARGS.bmodel) # create threads for inference thread_num = int(ARGS.threads) status = [None] * thread_num threads = list() for i in range(thread_num): threads.append( threading.Thread(target=thread_infer, args=(i, engine, ARGS.input, ARGS.loops, ARGS.compare, status))) for i in range(thread_num): threads[i].start() for i in range(thread_num): threads[i].join() # check status for stat in status: if not stat: sys.exit(-1) sys.exit(0)
def __init__(self, bmodel_path, tpu_id): # init Engine Net.engine_ = sail.Engine(tpu_id) # load bmodel without builtin input and output tensors Net.engine_.load(bmodel_path) # get model info # only one model loaded for this engine # only one input tensor and only one output tensor in this graph Net.handle_ = Net.engine_.get_handle() Net.graph_name_ = Net.engine_.get_graph_names()[0] input_names = Net.engine_.get_input_names(Net.graph_name_) input_dtype = 0 Net.tpu_id_ = tpu_id Net.input_name_ = input_names[0] for i in range(len(input_names)): Net.input_shapes_[input_names[i]] = Net.engine_.get_input_shape( Net.graph_name_, input_names[i]) input_dtype = Net.engine_.get_input_dtype(Net.graph_name_, input_names[i]) input = sail.Tensor(Net.handle_, Net.input_shapes_[input_names[i]], input_dtype, False, False) Net.input_tensors_[input_names[i]] = input Net.input_dtype_ = input_dtype Net.output_names_ = Net.engine_.get_output_names(Net.graph_name_) for i in range(len(Net.output_names_)): Net.output_shapes_[ Net.output_names_[i]] = Net.engine_.get_output_shape( Net.graph_name_, Net.output_names_[i]) output_dtype = Net.engine_.get_output_dtype( Net.graph_name_, Net.output_names_[i]) print(Net.output_shapes_[Net.output_names_[i]]) output = sail.Tensor(Net.handle_, Net.output_shapes_[Net.output_names_[i]], output_dtype, True, True) Net.output_tensors_[Net.output_names_[i]] = output for j in range(4): Net.output_shapes_array_.append( Net.output_shapes_[Net.output_names_[i]][j]) print(Net.input_shapes_) print(Net.output_shapes_) # set io_mode Net.engine_.set_io_mode(Net.graph_name_, sail.IOMode.SYSIO) Net.bmcv_ = sail.Bmcv(Net.handle_) Net.img_dtype_ = Net.bmcv_.get_bm_image_data_format(input_dtype) scale = Net.engine_.get_input_scale(Net.graph_name_, input_names[0]) scale *= 0.003922 Net.preprocessor_ = PreProcessor(Net.bmcv_, scale) # load postprocess so ll = ctypes.cdll.LoadLibrary Net.lib_post_process_ = ll('./libYoloPostProcess.so') if os.path.exists('result_imgs') is False: os.system('mkdir -p result_imgs')
def __init__(self, bmodel_path, tpu_id, stage): # init Engine Net.engine_ = sail.Engine(tpu_id) # load bmodel without builtin input and output tensors Net.engine_.load(bmodel_path) # get model info # only one model loaded for this engine # only one input tensor and only one output tensor in this graph Net.handle_ = Net.engine_.get_handle() Net.graph_name_ = Net.engine_.get_graph_names()[0] input_names = Net.engine_.get_input_names(Net.graph_name_) print("input names:", input_names) input_dtype = 0 Net.tpu_id_ = tpu_id Net.input_name_ = input_names[0] for i in range(len(input_names)): Net.input_shapes_[input_names[i]] = Net.engine_.get_input_shape( Net.graph_name_, input_names[i]) input_dtype = Net.engine_.get_input_dtype(Net.graph_name_, input_names[i]) input = sail.Tensor(Net.handle_, Net.input_shapes_[input_names[i]], input_dtype, False, False) Net.input_tensors_[input_names[i]] = input Net.input_dtype_ = input_dtype Net.output_names_ = Net.engine_.get_output_names(Net.graph_name_) for i in range(len(Net.output_names_)): Net.output_shapes_[ Net.output_names_[i]] = Net.engine_.get_output_shape( Net.graph_name_, Net.output_names_[i]) output_dtype = Net.engine_.get_output_dtype( Net.graph_name_, Net.output_names_[i]) output = sail.Tensor(Net.handle_, Net.output_shapes_[Net.output_names_[i]], output_dtype, True, True) Net.output_tensors_[Net.output_names_[i]] = output print("input shapes:", Net.input_shapes_) print("output shapes:", Net.output_shapes_) # set io_mode Net.engine_.set_io_mode(Net.graph_name_, sail.IOMode.SYSIO) Net.bmcv_ = sail.Bmcv(Net.handle_) Net.img_dtype_ = Net.bmcv_.get_bm_image_data_format(input_dtype) scale = Net.engine_.get_input_scale(Net.graph_name_, input_names[0]) print("scale", scale) scale *= 0.003922 Net.preprocessor_ = PreProcessor(Net.bmcv_, scale) # load postprocess so ll = ctypes.cdll.LoadLibrary Net.lib_post_process_ = ll('./post_process_lib/libPostProcess.so') Net.lib_post_process_.post_process_hello()
def inference(bmodel_path, input_path, loops, tpu_id, compare_path): """ Do inference of a model in a thread. Args: bmodel_path: Path to bmodel input_path: Path to input image. loops: Number of loops to run compare_path: Path to correct result file status: Status of comparison Returns: True for success and False for failure. """ # init Engine to load bmodel and allocate input and output tensors engine = sail.Engine(bmodel_path, tpu_id, sail.SYSIO) # get model info # only one model loaded for this engine # only one input tensor and only one output tensor in this graph graph_name = engine.get_graph_names()[0] input_name = engine.get_input_names(graph_name)[0] input_shape = engine.get_input_shape(graph_name, input_name) output_name = engine.get_output_names(graph_name)[0] output_shape = engine.get_output_shape(graph_name, output_name) out_dtype = engine.get_output_dtype(graph_name, output_name); reference = get_reference(compare_path) compare_type = 'fp32_top5' if out_dtype == sail.BM_FLOAT32 else 'int8_top5' # pipeline of inference for i in range(loops): # read image and preprocess image = preprocess(input_path).astype(np.float32) # inference with fp32 input and output # data scale(input: fp32 to int8, output: int8 to fp32) is done inside # for int8 model output = engine.process(graph_name, {input_name:image}) # postprocess result = postprocess(output[output_name]) # print result print("Top 5 of loop {} on tpu {}: {}".format(i, tpu_id, \ result[1]['top5_idx'][0])) if not compare(reference, result[1]['top5_idx'][0], compare_type): return False return True
def inference(bmodel_path, input_path, loops, tpu_id, compare_path): """ Load a bmodel and do inference. Args: bmodel_path: Path to bmodel input_path: Path to input file loops: Number of loops to run tpu_id: ID of TPU to use compare_path: Path to correct result file Returns: True for success and False for failure """ # init Engine to load bmodel and allocate input and output tensors engine = sail.Engine(bmodel_path, tpu_id, sail.SYSIO) # init preprocessor and postprocessor preprocessor = PreProcessor([127.5, 127.5, 127.5], 0.0078125) postprocessor = PostProcessor([0.5, 0.3, 0.7]) reference = postprocessor.get_reference(compare_path) status = True # pipeline of inference for i in range(loops): # read image image = cv2.imread(input_path) image = cv2.transpose(image) # run PNet, the first stage of MTCNN boxes = run_pnet(engine, preprocessor, postprocessor, image) if boxes is not None and len(boxes) > 0: # run RNet, the second stage of MTCNN boxes = run_rnet(engine, preprocessor, postprocessor, boxes, image) if boxes is not None and len(boxes) > 0: # run ONet, the third stage of MTCNN boxes = run_onet(engine, preprocessor, postprocessor, boxes, image) # print detected result if postprocessor.compare(reference, boxes, i): print_result(boxes, tpu_id) else: status = False break return status
def __init__(self, bmodel_path, tpu_id): # init Engine Net.engine_ = sail.Engine(tpu_id) # load bmodel without builtin input and output tensors Net.engine_.load(bmodel_path) # get model info # only one model loaded for this engine # only one input tensor and only one output tensor in this graph Net.handle_ = Net.engine_.get_handle() Net.graph_name_ = Net.engine_.get_graph_names()[0] Net.input_names_ = Net.engine_.get_input_names(Net.graph_name_) input_dtype = 0 Net.tpu_id_ = tpu_id for i in range(len(Net.input_names_)): Net.input_shapes_[Net.input_names_[i]] = Net.engine_.get_input_shape(Net.graph_name_, Net.input_names_[i]) input_dtype = Net.engine_.get_input_dtype(Net.graph_name_, Net.input_names_[i]) alloc_flag = False if i == 1: alloc_flag = True input = sail.Tensor(Net.handle_, Net.input_shapes_[Net.input_names_[i]], input_dtype, alloc_flag, alloc_flag) Net.input_tensors_[Net.input_names_[i]] = input Net.input_dtype_ = input_dtype Net.output_names_ = Net.engine_.get_output_names(Net.graph_name_) for i in range(len(Net.output_names_)): Net.output_shapes_[Net.output_names_[i]] = Net.engine_.get_output_shape(Net.graph_name_, Net.output_names_[i]) output_dtype = Net.engine_.get_output_dtype(Net.graph_name_, Net.output_names_[i]) output = sail.Tensor(Net.handle_, Net.output_shapes_[Net.output_names_[i]], output_dtype, True, True) Net.output_tensors_[Net.output_names_[i]] = output print (Net.input_shapes_) print (Net.output_shapes_) # set io_mode Net.engine_.set_io_mode(Net.graph_name_, sail.IOMode.SYSO) Net.bmcv_ = sail.Bmcv(Net.handle_) Net.img_dtype_ = Net.bmcv_.get_bm_image_data_format(input_dtype) scale = Net.engine_.get_input_scale(Net.graph_name_, Net.input_names_[0]) scale *= 0.003922 Net.preprocessor_ = PreProcessor(Net.bmcv_, scale) if os.path.exists('result_imgs') is False: os.system('mkdir -p result_imgs')
def load_models_from_memory(self, graphs_in_memory): """ Load all submodels which already read in memory. """ #if self.use_xx_cmodel: # raise RuntimeError("Can't load from memory while using cmodel.") models = list() for i in range(self.graph_num): graph = self.graph_infos["graphs"][i] if graph["device"] == "cpu": graph_name = "graph_{}".format(i) assert graph_name in graphs_in_memory model = self.load_graph_cpu_from_memory(graphs_in_memory[graph_name]) elif graph["device"] == "tpu": graph_name = "graph_{}_bmodel".format(i) assert graph_name in graphs_in_memory model = sail.Engine(graphs_in_memory[graph_name], \ len(graphs_in_memory[graph_name]), 0, sail.IOMode.SYSIO) else: raise RuntimeError('wrong device: {0}!!!'.format(graph['device'])) models.append(model) return models
def load_models(self): """ Load all submodels. """ models = list() for i in range(self.graph_num): graph = self.graph_infos["graphs"][i] if graph["device"] == "cpu": model = self.load_graph_cpu(graph['model_info']) elif graph["device"] == "tpu": if self.mode == 0: model = self.load_graph_cpu(graph['model_info']) elif self.mode == 1: model = self.load_graph_gpu(graph['model_info']) else: context_dir = str(os.path.join(self.folder, graph["context_dir"])) bmodel_path = os.path.join(context_dir, 'compilation.bmodel') #if self.use_xx_cmodel: # model = bmodel_path #else: model = sail.Engine(bmodel_path, 0, sail.IOMode.SYSIO) else: raise RuntimeError('wrong device: {0}!!!'.format(graph['device'])) models.append(model) return models
def main(): """ An example shows inference of multi-models in multi-threads on one TPU. """ # init Engine engine = sail.Engine(ARGS.tpu_id) # create threads for loading bmodel, you can also load in main thread thread_num = len(ARGS.bmodel) threads = list() # load bmodel without builtin input and output tensors # each thread manage its input and output tensors for i in range(thread_num): threads.append( threading.Thread(target=thread_load, args=(i, engine, ARGS.bmodel[i]))) for i in range(thread_num): threads[i].start() for i in range(thread_num): threads[i].join() threads.clear() status = [None] * thread_num graph_names = engine.get_graph_names() # create threads for inference, and each thread processes a model for i in range(thread_num): threads.append(threading.Thread(target=thread_infer, args=(i, graph_names[i], engine, \ ARGS.input, ARGS.loops, \ ARGS.compare, status))) for i in range(thread_num): threads[i].start() for i in range(thread_num): threads[i].join() # check status for stat in status: if not stat: sys.exit(-1) sys.exit(0)
def bm1682_init(self): """Init bm1682 inference """ inference = sail.Engine(self.context_path, self.tpus, sail.IOMode.SYSIO) return inference
def inference(bmodel_path, input_path, loops, tpu_id, compare_path): """ Load a bmodel and do inference. Args: bmodel_path: Path to bmodel input_path: Path to input file loops: Number of loops to run tpu_id: ID of TPU to use compare_path: Path to correct result file Returns: True for success and False for failure """ # init Engine engine = sail.Engine(tpu_id) # load bmodel without builtin input and output tensors engine.load(bmodel_path) # get model info # only one model loaded for this engine # only one input tensor and only one output tensor in this graph graph_name = engine.get_graph_names()[0] input_name = engine.get_input_names(graph_name)[0] output_name = engine.get_output_names(graph_name)[0] input_shape = [4, 3, 300, 300] input_shapes = {input_name: input_shape} output_shape = [1, 1, 800, 7] input_dtype = engine.get_input_dtype(graph_name, input_name) output_dtype = engine.get_output_dtype(graph_name, output_name) is_fp32 = (input_dtype == sail.Dtype.BM_FLOAT32) # get handle to create input and output tensors handle = engine.get_handle() input = sail.Tensor(handle, input_shape, input_dtype, False, False) output = sail.Tensor(handle, output_shape, output_dtype, True, True) input_tensors = {input_name: input} output_tensors = {output_name: output} # set io_mode engine.set_io_mode(graph_name, sail.IOMode.SYSO) # init bmcv for preprocess bmcv = sail.Bmcv(handle) img_dtype = bmcv.get_bm_image_data_format(input_dtype) # init preprocessor and postprocessor scale = engine.get_input_scale(graph_name, input_name) preprocessor = PreProcessor(bmcv, scale) threshold = 0.59 if is_fp32 else 0.52 postprocessor = PostProcess(threshold) reference = postprocessor.get_reference(compare_path) # init decoder decoder = sail.Decoder(input_path, True, tpu_id) status = True # pipeline of inference for i in range(loops): imgs_0 = sail.BMImageArray4D() imgs_1 = sail.BMImageArray4D(handle, input_shape[2], input_shape[3], \ sail.Format.FORMAT_BGR_PLANAR, img_dtype) # read 4 frames from input video for batch size is 4 flag = False for j in range(4): ret = decoder.read_(handle, imgs_0[j]) if ret != 0: print("Finished to read the video!") flag = True break if flag: break # preprocess preprocessor.process(imgs_0, imgs_1) bmcv.bm_image_to_tensor(imgs_1, input) # inference engine.process(graph_name, input_tensors, input_shapes, output_tensors) # postprocess real_output_shape = engine.get_output_shape(graph_name, output_name) out = output.asnumpy(real_output_shape) dets = postprocessor.process(out, imgs_0[0].width(), imgs_0[0].height()) # print result if postprocessor.compare(reference, dets, i): for j, vals in dets.items(): frame_id = int(i * 4 + j + 1) img0 = sail.BMImage(imgs_0[j]) for class_id, score, x0, y0, x1, y1 in vals: msg = '[Frame {} on tpu {}] Category: {}, Score: {:.3f},' msg += ' Box: [{}, {}, {}, {}]' print( msg.format(frame_id, tpu_id, class_id, score, x0, y0, x1, y1)) bmcv.rectangle(img0, x0, y0, x1 - x0 + 1, y1 - y0 + 1, (255, 0, 0), 3) bmcv.imwrite('result-{}.jpg'.format(frame_id), img0) else: status = False break return status
def inference(bmodel_path, input_path, loops, tpu_id, compare_path): """ Load a bmodel and do inference. Args: bmodel_path: Path to bmodel input_path: Path to input file loops: Number of loops to run tpu_id: ID of TPU to use compare_path: Path to correct result file Returns: True for success and False for failure """ # init Engine and load bmodel engine = sail.Engine(bmodel_path, tpu_id, sail.IOMode.SYSIO) # get model info # only one model loaded for this engine # only one input tensor and only one output tensor in this graph graph_name = engine.get_graph_names()[0] input_name = engine.get_input_names(graph_name)[0] output_name = engine.get_output_names(graph_name)[0] input_dtype = engine.get_input_dtype(graph_name, input_name) is_fp32 = (input_dtype == sail.Dtype.BM_FLOAT32) scale = engine.get_input_scale(graph_name, input_name) # init preprocessor and postprocessor preprocessor = PreProcessor(scale) threshold = 0.59 if is_fp32 else 0.52 postprocessor = PostProcessor(threshold) reference = postprocessor.get_reference(compare_path) is_image = True if input_path.split('.')[-1] in \ ['jpg', 'JPG', 'jpeg', 'JPEG'] else False if not is_image: cap = cv2.VideoCapture(input_path) status = True # pipeline of inference for i in range(loops): # read an image from a image file or a video file if is_image: img0 = cv2.imread(input_path) else: ret, img0 = cap.read() if not ret: print("Finished to read the video!") break h, w, _ = img0.shape # preprocess data = preprocessor.process(img0) # inference input_tensors = {input_name: np.array([data], dtype=np.float32)} output = engine.process(graph_name, input_tensors) # postprocess dets = postprocessor.process(output[output_name], w, h) # print result if postprocessor.compare(reference, dets, i): for (class_id, score, x0, y0, x1, y1) in dets: message = '[Frame {} on tpu {}] Category: {}, Score: {:.3f},' message += ' Box: [{}, {}, {}, {}]' print( message.format(i + 1, tpu_id, class_id, score, x0, y0, x1, y1)) cv2.rectangle(img0, (x0, y0), (x1, y1), (255, 0, 0), 3) cv2.imwrite('result-{}.jpg'.format(i + 1), img0) else: status = False break if not is_image: cap.release() return status