def gen_tensor_desc(self, temp_buffer, temp_width, temp_height, flag=True, need_malloc=True): if flag: decode_out_width = ((temp_width + 127) // 128) * 128 decode_out_height = ((temp_height + 15) // 16) * 16 else: if temp_height % 2 or temp_width % 2: raise Exception( "[Dvpp] output width={} or height={} is odd".format( temp_width, temp_height)) decode_out_width = temp_width decode_out_height = temp_height decode_out_buffer_size = decode_out_width * decode_out_height * 3 // 2 if need_malloc: temp_buffer, result = acl.media.dvpp_malloc(decode_out_buffer_size) check_ret("acl.media.dvpp_malloc", result) temp_desc = acl.media.dvpp_create_pic_desc() # 设置图片描述的属性参数 acl.media.dvpp_set_pic_desc_data(temp_desc, temp_buffer) acl.media.dvpp_set_pic_desc_format(temp_desc, self._format) acl.media.dvpp_set_pic_desc_width(temp_desc, temp_width) acl.media.dvpp_set_pic_desc_height(temp_desc, temp_height) acl.media.dvpp_set_pic_desc_width_stride(temp_desc, decode_out_width) acl.media.dvpp_set_pic_desc_height_stride(temp_desc, decode_out_height) acl.media.dvpp_set_pic_desc_size(temp_desc, decode_out_buffer_size) return temp_desc, temp_buffer, decode_out_buffer_size
def _print_result(self, infer_output): num = acl.mdl.get_dataset_num_buffers(infer_output) for i in range(num): dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) check_ret("acl.mdl.get_cur_output_dims", ret) out_dim = dims['dims'] temp_output_buffer = acl.mdl.get_dataset_buffer(infer_output, i) infer_output_ptr = acl.get_data_buffer_addr(temp_output_buffer) infer_output_size = acl.get_data_buffer_size_v2(temp_output_buffer) output_host, _ = acl.rt.malloc_host(infer_output_size) acl.rt.memcpy(output_host, infer_output_size, infer_output_ptr, infer_output_size, ACL_MEMCPY_DEVICE_TO_HOST) if "ptr_to_bytes" in dir(acl.util): bytes_data = acl.util.ptr_to_bytes(output_host, infer_output_size) result = np.frombuffer(bytes_data, dtype=np.float32).reshape( tuple(out_dim)) else: result = acl.util.ptr_to_numpy(output_host, tuple(out_dim), NPY_FLOAT32) vals = np.array(result).flatten() top_k = vals.argsort()[-1:-6:-1] possible = 0 print("\n========= top5 inference results: =========") for j in top_k: print("label:%d prob: %f" % (j, vals[j])) possible += vals[j] print("result: class_label[{}],top1[{:f}],top5[{:f}] ".format( top_k[0], vals[top_k[0]], possible if possible < 1 else 1)) ret = acl.rt.free_host(output_host) check_ret("acl.rt.free_host", ret)
def run(self, video_path): self.video_path, self.input_width, self.input_height, \ self.dtype = video_path # here set callback timeout time. timeout = 100 cb_thread_id, ret = acl.util.start_thread(self._thread_func, [timeout]) self.init_resource(cb_thread_id) # vdec output need to be stride to 16*2 vdec_out_height = int(int((self.input_height + 1) / 2) * 2) vdec_out_width = int(int((self.input_width + 15) / 16) * 16) output_pic_size = vdec_out_width * vdec_out_height * 3 // 2 # input_stream_size: is the size read from original stream file. # input_stream_mem:the ptr to the data, which have been copy to device after read from original file. self.input_stream_mem, input_stream_size = self. \ _gen_input_dataset(self.video_path) self.forward(output_pic_size, input_stream_size) ret = acl.media.vdec_destroy_channel(self.vdec_channel_desc) check_ret("acl.media.vdec_destroy_channel", ret) self._vdec_exit = False ret = acl.util.stop_thread(cb_thread_id) check_ret("acl.util.stop_thread", ret) print("[Vdec] vdec finish!!!\n")
def init_resource_arg_max(self): self.op_type_name = "ArgMaxD" self.op_attr = acl.op.create_attr() ret = acl.op.set_attr_int(self.op_attr, "dimension", 0) check_ret("acl.op.set_attr_int", ret) # settings of arg_max operator self.input_desc_arg_max_d = \ acl.create_tensor_desc(ACL_FLOAT16, [self.input_shape, ], ACL_FORMAT_ND) self.output_desc_arg_max_d = \ acl.create_tensor_desc(ACL_INT32, [self.output_shape, ], ACL_FORMAT_ND) self.tensor_size_arg_max_d = \ acl.get_tensor_desc_size(self.output_desc_arg_max_d) self.dev_buffer_arg_max_d, ret = \ acl.rt.malloc(self.tensor_size_arg_max_d, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) self.output_buffer_arg_max_d = \ acl.create_data_buffer(self.dev_buffer_arg_max_d, self.tensor_size_arg_max_d)
def init_resource(self): print("[Dvpp] class Dvpp init resource stage:") self._dvpp_channel_desc = acl.media.dvpp_create_channel_desc() ret = acl.media.dvpp_create_channel(self._dvpp_channel_desc) check_ret("acl.media.dvpp_create_channel", ret) self._resize_config = acl.media.dvpp_create_resize_config() print("[Dvpp] class Dvpp init resource stage success")
def forward(self, img_buffer, img_buffer_size, img_width, img_height): print('[Dvpp] vpc resize stage:') # because here the input is from vdec, the input stride is 16*2; self._resize_in_desc_, self._decode_out_buffer, _resize_in_size = \ self.gen_tensor_desc(img_buffer, img_width, img_height, need_malloc=False) # the output of resize is vpc constraint, the output stride is 16*2 too. self._resize_out_desc, self._resize_out_dev, self._resize_out_size = \ self.gen_tensor_desc(self._resize_out_dev, self._model_input_width, self._model_input_height, flag=False) if _resize_in_size != img_buffer_size: print("[Dvpp] self._resize_out_buffer_size:{} img_buffer_size:{}". format(_resize_in_size, img_buffer_size)) raise Exception("[Dvpp] Size doesn't match") ret = acl.media.dvpp_vpc_resize_async(self._dvpp_channel_desc, self._resize_in_desc_, self._resize_out_desc, self._resize_config, self.stream) check_ret("acl.media.dvpp_vpc_resize_async", ret) ret = acl.rt.synchronize_stream(self.stream) check_ret("acl.rt.synchronize_stream", ret) print('[Dvpp] vpc resize stage success')
def _set_input(self, input_stream_size): self.dvpp_stream_desc = acl.media.dvpp_create_stream_desc() ret = acl.media.dvpp_set_stream_desc_data(self.dvpp_stream_desc, self.input_stream_mem) check_ret("acl.media.dvpp_set_stream_desc_data", ret) ret = acl.media.dvpp_set_stream_desc_size(self.dvpp_stream_desc, input_stream_size) check_ret("acl.media.dvpp_set_stream_desc_size", ret) print("[Vdec] create input stream desc success")
def __del__(self): self._release_dataset() if self.model_id: ret = acl.mdl.unload(self.model_id) check_ret("acl.mdl.unload", ret) if self.model_desc: ret = acl.mdl.destroy_desc(self.model_desc) check_ret("acl.mdl.destroy_desc", ret) print("[Model] The class Model releases resources successfully.")
def init_resource(self): print("[Model] class Model init resource stage:") acl.rt.set_context(self.context) self.model_id, ret = acl.mdl.load_from_file(self.model_path) check_ret("acl.mdl.load_from_file", ret) self.model_desc = acl.mdl.create_desc() ret = acl.mdl.get_desc(self.model_desc, self.model_id) check_ret("acl.mdl.get_desc", ret) output_size = acl.mdl.get_num_outputs(self.model_desc) self._gen_output_dataset(output_size) print("[Model] class Model init resource stage success")
def _gen_input_dataset(self, dvpp_output_buffer, dvpp_output_size): print("[Model] create model input dataset:") self.input_dataset = acl.mdl.create_dataset() input_dataset_buffer = acl.create_data_buffer(dvpp_output_buffer, dvpp_output_size) _, ret = acl.mdl.add_dataset_buffer(self.input_dataset, input_dataset_buffer) if ret: ret = acl.destroy_data_buffer(input_dataset_buffer) check_ret("acl.destroy_data_buffer", ret) print("[Model] create model input dataset success")
def _forward_op_cast(self): ret = acl.op.cast(self._input_desc, self.input_buffer, self._output_desc, self.output_buffer_cast, 0, self.stream) check_ret("acl.op.cast", ret) ret = acl.rt.synchronize_stream(self.stream) check_ret("acl.rt.synchronize_stream", ret) print("[SingleOP] single op cast success")
def _release_dataset(self): for dataset in [self.input_dataset, self.output_data]: if not dataset: continue num_buffers = acl.mdl.get_dataset_num_buffers(dataset) for i in range(num_buffers): data_buf = acl.mdl.get_dataset_buffer(dataset, i) if data_buf: ret = acl.destroy_data_buffer(data_buf) check_ret("acl.destroy_data_buffer", ret) ret = acl.mdl.destroy_dataset(dataset) check_ret("acl.mdl.destroy_dataset", ret)
def init_resource(self): # 进行资源初始化 print("[Model] The class Model initializes resources:") acl.rt.set_context(self.context) self.model_id, ret = acl.mdl.load_from_file(self.model_path) check_ret("acl.mdl.load_from_file", ret) self.model_desc = acl.mdl.create_desc() ret = acl.mdl.get_desc(self.model_desc, self.model_id) check_ret("acl.mdl.get_desc", ret) output_size = acl.mdl.get_num_outputs(self.model_desc) self._gen_output_dataset(output_size) print("[Model] The class Model initializes resources successfully.")
def _forward_op_arg_max_d(self): ret = acl.op.execute_v2( self.op_type_name, [self.input_desc_arg_max_d], [self.output_buffer_cast], [self.output_desc_arg_max_d], [self.output_buffer_arg_max_d], self.op_attr, self.stream) check_ret("acl.op.execute_v2", ret) ret = acl.rt.synchronize_stream(self.stream) check_ret("acl.rt.synchronize_stream", ret) print("[SingleOp] get top 1 label success")
def _gen_input_dataset(self, img_path): img = np.fromfile(img_path, dtype=self.dtype) img_buffer_size = img.size if "bytes_to_ptr" in dir(acl.util): bytes_data = img.tobytes() img_ptr = acl.util.bytes_to_ptr(bytes_data) else: img_ptr = acl.util.numpy_to_ptr(img) img_device, ret = acl.media.dvpp_malloc(img_buffer_size) ret = acl.rt.memcpy(img_device, img_buffer_size, img_ptr, img_buffer_size, ACL_MEMCPY_HOST_TO_DEVICE) check_ret("acl.rt.memcpy", ret) return img_device, img_buffer_size
def __del__(self): if self._input_desc: acl.destroy_tensor_desc(self._input_desc) if self._output_desc: acl.destroy_tensor_desc(self._output_desc) if self.dev_buffer_cast: ret = acl.rt.free(self.dev_buffer_cast) check_ret("acl.rt.free", ret) if self.input_desc_arg_max_d: acl.destroy_tensor_desc(self.input_desc_arg_max_d) if self.output_desc_arg_max_d: acl.destroy_tensor_desc(self.output_desc_arg_max_d) if self.op_attr: acl.op.destroy_attr(self.op_attr) if self.dev_buffer_arg_max_d: ret = acl.rt.free(self.dev_buffer_arg_max_d) check_ret("acl.rt.free", ret) if self.output_buffer_cast: ret = acl.destroy_data_buffer(self.output_buffer_cast) check_ret("acl.destroy_data_buffer", ret) if self.output_buffer_arg_max_d: ret = acl.destroy_data_buffer(self.output_buffer_arg_max_d) check_ret("acl.destroy_data_buffer", ret) print("[SingOp] class SingOp release source success")
def __del__(self): if self._decode_out_buffer: acl.media.dvpp_free(self._decode_out_buffer) self._decode_out_buffer = None if self._resize_in_desc_: acl.media.dvpp_destroy_pic_desc(self._resize_in_desc_) self._resize_in_desc_ = None if self._resize_out_desc: acl.media.dvpp_destroy_pic_desc(self._resize_out_desc) self._resize_out_desc = None if self._resize_config: ret = acl.media.dvpp_destroy_resize_config(self._resize_config) check_ret("acl.media.dvpp_destroy_resize_config", ret) if self._dvpp_channel_desc: ret = acl.media.dvpp_destroy_channel(self._dvpp_channel_desc) check_ret("acl.media.dvpp_destroy_channel", ret) ret = acl.media.dvpp_destroy_channel_desc(self._dvpp_channel_desc) check_ret("acl.media.dvpp_destroy_channel_desc", ret) if self._resize_out_dev: ret = acl.media.dvpp_free(self._resize_out_dev) check_ret("acl.media.dvpp_free", ret) print("[Dvpp] class Dvpp release source success")
def init_resource_cast(self): # settings of cast operator self._input_desc = acl.create_tensor_desc(ACL_FLOAT, [self.input_shape], ACL_FORMAT_ND) self._output_desc = acl.create_tensor_desc(ACL_FLOAT16, [self.input_shape], ACL_FORMAT_ND) tensor_size = acl.get_tensor_desc_size(self._output_desc) self.dev_buffer_cast, ret = acl.rt.malloc(tensor_size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) self.output_buffer_cast = acl.create_data_buffer(self.dev_buffer_cast, tensor_size)
def forward(self, output_pic_size, input_stream_size): self.frame_config = acl.media.vdec_create_frame_config() for i in range(self.rest_len): print("[Vdec] forward index:{}".format(i)) self._set_input(input_stream_size) self._set_pic_output(output_pic_size) # vdec_send_frame ret = acl.media.vdec_send_frame(self.vdec_channel_desc, self.dvpp_stream_desc, self.dvpp_pic_desc, self.frame_config, None) check_ret("acl.media.vdec_send_frame", ret) print('[Vdec] vdec_send_frame stage success')
def _gen_output_dataset(self, size): print("[Model] create output dataset:") dataset = acl.mdl.create_dataset() for i in range(size): temp_buffer_size = acl.mdl.get_output_size_by_index( self.model_desc, i) temp_buffer, ret = acl.rt.malloc(temp_buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) dataset_buffer = acl.create_data_buffer(temp_buffer, temp_buffer_size) _, ret = acl.mdl.add_dataset_buffer(dataset, dataset_buffer) if ret: ret = acl.destroy_data_buffer(dataset_buffer) check_ret("acl.destroy_data_buffer", ret) self.output_data = dataset print("[Model] create output dataset success")
def _destroy_resource(self): print("[Vdec] release resource:") ret = acl.media.dvpp_free(self.input_stream_mem) check_ret("acl.media.dvpp_free", ret) ret = acl.media.vdec_destroy_channel(self.vdec_channel_desc) check_ret("acl.media.vdec_destroy_channel", ret) ret = acl.media.vdec_destroy_channel_desc(self.vdec_channel_desc) check_ret("acl.media.vdec_destroy_channel_desc", ret) ret = acl.media.vdec_destroy_frame_config(self.frame_config) check_ret("acl.media.vdec_destroy_frame_config", ret) print("[Vdec] release resource success")
def _resize_process(self, img_width, img_height): print('[Dvpp] vpc resize stage:') self._resize_input_desc_, self._decode_out_buffer, _resize_in_size = \ self.gen_tensor_desc(self._decode_out_dev_buffer, img_width, img_height, need_malloc=False) self._resize_out_desc, self._resize_out_dev, self._resize_out_size = \ self.gen_tensor_desc(self._resize_out_dev, self._model_input_width, self._model_input_height, flag=False) ret = acl.media.dvpp_vpc_resize_async(self._dvpp_channel_desc, self._resize_input_desc_, self._resize_out_desc, self._resize_config, self.stream) check_ret("acl.media.dvpp_vpc_resize_async", ret) ret = acl.rt.synchronize_stream(self.stream) check_ret("acl.rt.synchronize_stream", ret) print('[Dvpp] vpc resize stage success')
def gen_tensor_desc(self, temp_buffer, temp_width, temp_height, flag=True, need_malloc=True): if flag: # stride_width = int(int((temp_width + 127) / 128) * 128) # stride_height = int(int((temp_height + 15) / 16) * 16) # because here the input is from vdec, the input stride is 16*2; # and the output of resize is vpc constraint, the output stride is 16*2 too. stride_width = int(int((temp_width + 15) / 16) * 16) stride_height = int(int((temp_height + 1) / 2) * 2) else: if temp_height % 2 or temp_width % 2: raise Exception( "[Dvpp] width={} or height={} of output is odd".format( temp_width, temp_height)) stride_width = temp_width stride_height = temp_height decode_out_buffer_size = int(int(stride_width * stride_height * 3) / 2) if need_malloc: temp_buffer, ret = acl.media.dvpp_malloc(decode_out_buffer_size) check_ret("acl.media.dvpp_malloc", ret) temp_desc = acl.media.dvpp_create_pic_desc() acl.media.dvpp_set_pic_desc_data(temp_desc, temp_buffer) acl.media.dvpp_set_pic_desc_format(temp_desc, self._format) acl.media.dvpp_set_pic_desc_width(temp_desc, temp_width) acl.media.dvpp_set_pic_desc_height(temp_desc, temp_height) acl.media.dvpp_set_pic_desc_width_stride(temp_desc, stride_width) acl.media.dvpp_set_pic_desc_height_stride(temp_desc, stride_height) acl.media.dvpp_set_pic_desc_size(temp_desc, decode_out_buffer_size) return temp_desc, temp_buffer, decode_out_buffer_size
def _set_pic_output(self, output_pic_size): # pic_desc output_pic_mem, ret = acl.media.dvpp_malloc(output_pic_size) check_ret("acl.media.dvpp_malloc", ret) self.dvpp_pic_desc = acl.media.dvpp_create_pic_desc() acl.media.dvpp_set_pic_desc_height(self.dvpp_pic_desc, self.input_height) acl.media.dvpp_set_pic_desc_width(self.dvpp_pic_desc, self.input_width) vdec_out_height = int(int((self.input_height + 1) / 2) * 2) vdec_out_width = int(int((self.input_width + 15) / 16) * 16) acl.media.dvpp_set_pic_desc_width_stride(self.dvpp_pic_desc, vdec_out_width) acl.media.dvpp_set_pic_desc_height_stride(self.dvpp_pic_desc, vdec_out_height) acl.media.dvpp_set_pic_desc_data(self.dvpp_pic_desc, output_pic_mem) acl.media.dvpp_set_pic_desc_size(self.dvpp_pic_desc, output_pic_size) acl.media.dvpp_set_pic_desc_format(self.dvpp_pic_desc, self._format) print("[Vdec] create output pic desc success")
def forward(self, temp): _, input_width, input_height, _ = temp # vdec process,note:the input is h264 file,vdec output datasize need to be computed by strided width and height by 16*2 self.vdec_process.run(temp) images_buffer = self.vdec_process.get_image_buffer() if images_buffer: for img_buffer in images_buffer: img_device, img_buffer_size = \ self._transfer_to_device(img_buffer) print("vdec output, img_buffer_size = ", img_buffer_size) # vpc process, parameters:vdec output buffer and size, original picture width and height. dvpp_output_buffer, dvpp_output_size = \ self.dvpp_process.run(img_device, img_buffer_size, input_width, input_height) ret = acl.media.dvpp_free(img_device) check_ret("acl.media.dvpp_free", ret) self.model_process.run(dvpp_output_buffer, dvpp_output_size)
def __del__(self): self._release_dataset() while self.device_outputs: ret = acl.rt.free(self.device_outputs.pop()) check_ret("acl.rt.free", ret) if self.model_id: ret = acl.mdl.unload(self.model_id) check_ret("acl.mdl.unload", ret) if self.model_desc: ret = acl.mdl.destroy_desc(self.model_desc) check_ret("acl.mdl.destroy_desc", ret) print("[Model] class Model release source success")
def init_resource(self): print("init resource stage:") acl.init() ret = acl.rt.set_device(self.device_id) check_ret("acl.rt.set_device", ret) self.context, ret = acl.rt.create_context(self.device_id) check_ret("acl.rt.create_context", ret) self.stream, ret = acl.rt.create_stream() check_ret("acl.rt.create_stream", ret) print("init resource stage success")
def _release_dataset(self): for dataset in [self.input_dataset, self.output_data]: if not dataset: continue # 获取data buffer的个数并逐一释放 num = acl.mdl.get_dataset_num_buffers(dataset) for i in range(num): data_buf = acl.mdl.get_dataset_buffer(dataset, i) if data_buf: data = acl.get_data_buffer_addr(data_buf) ret = acl.rt.free(data) check_ret("acl.rt.free", ret) ret = acl.destroy_data_buffer(data_buf) check_ret("acl.destroy_data_buffer", ret) ret = acl.mdl.destroy_dataset(dataset) check_ret("acl.mdl.destroy_dataset", ret)
def release_resource(self): print('[Sample] release source stage:') if self.dvpp_process: del self.dvpp_process if self.model_process: del self.model_process if self.vdec_process: del self.vdec_process if self.stream: ret = acl.rt.destroy_stream(self.stream) check_ret("acl.rt.destroy_stream", ret) if self.context: ret = acl.rt.destroy_context(self.context) check_ret("acl.rt.destroy_context", ret) ret = acl.rt.reset_device(self.device_id) check_ret("acl.rt.reset_device", ret) ret = acl.finalize() check_ret("acl.finalize", ret) print('[Sample] release source stage success')
def _print_label(self): host_buffer, ret = acl.rt.malloc_host(self.tensor_size_arg_max_d) check_ret("acl.rt.malloc_host", ret) ret = acl.rt.memcpy(host_buffer, self.tensor_size_arg_max_d, self.dev_buffer_arg_max_d, self.tensor_size_arg_max_d, ACL_MEMCPY_DEVICE_TO_HOST) check_ret("acl.rt.memcpy", ret) if "ptr_to_bytes" in dir(acl.util): bytes_data = acl.util.ptr_to_bytes(host_buffer, self.tensor_size_arg_max_d) data = np.frombuffer(bytes_data, dtype=np.int32) else: data = acl.util.ptr_to_numpy(host_buffer, (self.output_shape,), 5) print("[SingleOP][ArgMaxOp] label of classification result is:{}" .format(data[0])) ret = acl.rt.free_host(host_buffer) check_ret("acl.rt.free_host", ret)