def _gen_input_dataset(self, input_list): ret = const.SUCCESS if len(input_list) != self._input_num: log_error("Current input data num %d unequal to model " "input num %d" % (len(input_list), self._input_num)) return const.FAILED self._input_dataset = acl.mdl.create_dataset() for i in range(self._input_num): item = input_list[i] data, size = self._parse_input_data(item, i) if (data is None) or (size == 0): ret = const.FAILED log_error("The %d input is invalid" % (i)) break model_size = acl.mdl.get_input_size_by_index(self._model_desc, i) if size != model_size: log_warning(" Input[%d] size: %d not equal om size: %d" % (i, size, model_size) +\ ", may cause inference result error, please check model input") dataset_buffer = acl.create_data_buffer(data, size) _, ret = acl.mdl.add_dataset_buffer(self._input_dataset, dataset_buffer) if ret: log_error("Add input dataset buffer failed") acl.destroy_data_buffer(self._input_dataset) ret = const.FAILED break if ret == const.FAILED: self._release_dataset(self._input_dataset) self._input_dataset = None return ret
def init_resource_arg_max(self): self.op_type_name = "ArgMaxD" self.op_attr = acl.op.create_attr() ret = acl.op.set_attr_int(self.op_attr, "dimension", 0) check_ret("acl.op.set_attr_int", ret) # settings of arg_max operator self.input_desc_arg_max_d = \ acl.create_tensor_desc(ACL_FLOAT16, [self.input_shape, ], ACL_FORMAT_ND) self.output_desc_arg_max_d = \ acl.create_tensor_desc(ACL_INT32, [self.output_shape, ], ACL_FORMAT_ND) self.tensor_size_arg_max_d = \ acl.get_tensor_desc_size(self.output_desc_arg_max_d) self.dev_buffer_arg_max_d, ret = \ acl.rt.malloc(self.tensor_size_arg_max_d, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) self.output_buffer_arg_max_d = \ acl.create_data_buffer(self.dev_buffer_arg_max_d, self.tensor_size_arg_max_d)
def _gen_input_dataset(self, input_list): #组织输入数据的dataset结构 ret = SUCCESS #如果输入的数据个数与模型要求的不匹配,则直接返回 if len(input_list) != self._input_num: print("Current input data num %d unequal to" " model input num %d" % (len(input_list), self._input_num)) return FAILED self.input_dataset = acl.mdl.create_dataset() for i in range(self._input_num): item = input_list[i] #解析输入,当前支持输入AclImage类型、Acl指针和numpy数组 data, size = self._parse_input_data(item, i) if (data is None) or (size == 0): #解析数据失败时不再解析剩余数据 ret = FAILED print("The %d input is invalid" % (i)) break #创建输入dataset buffer结构,填入输入的数据 dataset_buffer = acl.create_data_buffer(data, size) #将dataset buffer加入dataset _, ret = acl.mdl.add_dataset_buffer(self.input_dataset, dataset_buffer) if ret: print("Add input dataset buffer failed") acl.destroy_data_buffer(self.input_dataset) ret = FAILED break if ret == FAILED: #失败时释放dataset self._release_dataset(self.input_dataset) return ret
def _gen_input_dataset(self, input_list): # organize input dataset structure ret = SUCCESS # return if the input number does not match model requirements if len(input_list) != self._input_num: print("Current input data num %d unequal to" " model input num %d"%(len(input_list), self._input_num)) return FAILED self.input_dataset = acl.mdl.create_dataset() for i in range(self._input_num): item = input_list[i] # parse input, currently supports AclImage type, Acl pointer and numpy array data, size = self._parse_input_data(item, i) if (data is None) or (size == 0): # not parse the remaining data when parsing data fails ret = FAILED print("The %d input is invalid"%(i)) break # create input dataset buffer structure, fill in input data dataset_buffer = acl.create_data_buffer(data, size) # add dataset buffer to dataset _, ret = acl.mdl.add_dataset_buffer(self.input_dataset, dataset_buffer) if ret: print("Add input dataset buffer failed") acl.destroy_data_buffer(self.input_dataset) ret = FAILED break if ret == FAILED: # release dataset if fails self._release_dataset(self.input_dataset) return ret
def _gen_dataset(self, type_str="input"): """According buffer to create dataset(acl.mdl.create_dataset()) object for execute Args: type_str(str): For choose to create which dataset(input or output) """ dataset = acl.mdl.create_dataset() temp_dataset = None if type_str == "in": self.load_input_dataset = dataset temp_dataset = self.input_data else: self.load_output_dataset = dataset temp_dataset = self.output_data for item in temp_dataset: data = acl.create_data_buffer(item["buffer"], item["size"]) if data is None: ret = acl.destroy_data_buffer(dataset) check_ret("acl.destroy_data_buffer", ret) _, ret = acl.mdl.add_dataset_buffer(dataset, data) if ret != ACL_ERROR_NONE: ret = acl.destroy_data_buffer(dataset) check_ret("acl.destroy_data_buffer", ret)
def _load_input_data(self, images_data): if "bytes_to_ptr" in dir(acl.util): bytes_data = images_data.tobytes() img_ptr = acl.util.bytes_to_ptr(bytes_data) else: img_ptr = acl.util.numpy_to_ptr(images_data) # host ptr # memcopy host to device image_buffer_size = images_data.size * images_data.itemsize img_device, ret = acl.rt.malloc( image_buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) ret = acl.rt.memcpy(img_device, image_buffer_size, img_ptr, image_buffer_size, ACL_MEMCPY_HOST_TO_DEVICE) check_ret("acl.rt.memcpy", ret) # create dataset in device img_dataset = acl.mdl.create_dataset() img_data_buffer = acl.create_data_buffer(img_device, image_buffer_size) if img_data_buffer is None: raise Exception("can't create data buffer, create input failed!!!") _, ret = acl.mdl.add_dataset_buffer(img_dataset, img_data_buffer) if ret != ACL_SUCCESS: ret = acl.destroy_data_buffer(img_data_buffer) check_ret("acl.destroy_data_buffer", ret) return img_dataset
def _gen_input_dataset(self, input_list): ret = const.SUCCESS if len(input_list) != self._input_num: log_error("Current input data num %d unequal to model " "input num %d" % (len(input_list), self._input_num)) return const.FAILED self._input_dataset = acl.mdl.create_dataset() for i in range(self._input_num): item = input_list[i] data, size = self._parse_input_data(item, i) if (data is None) or (size == 0): ret = const.FAILED log_error("The %d input is invalid" % (i)) break dataset_buffer = acl.create_data_buffer(data, size) _, ret = acl.mdl.add_dataset_buffer(self._input_dataset, dataset_buffer) if ret: log_error("Add input dataset buffer failed") acl.destroy_data_buffer(self._input_dataset) ret = const.FAILED break if ret == const.FAILED: self._release_dataset(self._input_dataset) self._input_dataset = None return ret
def _gen_input_dataset(self, data, data_size): self.input_dataset = acl.mdl.create_dataset() input_dataset_buffer = acl.create_data_buffer(data, data_size) _, ret = acl.mdl.add_dataset_buffer(self.input_dataset, input_dataset_buffer) if ret: ret = acl.destroy_data_buffer(self.input_dataset) check_ret("acl.destroy_data_buffer", ret)
def forward_op_batch_matmul(data, stream): ret = acl.op.set_model_dir(MODEL_MATMUL_PATH) check_ret("acl.op.set_model_dir", ret) op_attr = acl.op.create_attr() ret = acl.op.set_attr_bool(op_attr, "adj_x1", False) check_ret("acl.op.set_attr_bool", ret) ret = acl.op.set_attr_bool(op_attr, "adj_x2", False) check_ret("acl.op.set_attr_bool", ret) input_desc_batch_matmul_x1 = \ acl.create_tensor_desc(ACL_FLOAT, [1, 1, 1024, 1024], ACL_FORMAT_NCHW) input_desc_batch_matmul_x2 = \ acl.create_tensor_desc(ACL_FLOAT, [1, 1, 1024, 27648], ACL_FORMAT_NCHW) output_desc_batch_matmul_y = \ acl.create_tensor_desc(ACL_FLOAT, [1, 1, 1024, 27648], ACL_FORMAT_NCHW) tensor_size_batch_matmul_x1 = \ acl.get_tensor_desc_size(input_desc_batch_matmul_x1) tensor_size_batch_matmul_x2 = \ acl.get_tensor_desc_size(input_desc_batch_matmul_x2) tensor_size_batch_matmul_y = \ acl.get_tensor_desc_size(output_desc_batch_matmul_y) input_buffer_x1 = create_input(data[0], tensor_size_batch_matmul_x1) input_buffer_x2 = create_input(data[1], tensor_size_batch_matmul_x2) dev_buffer_batch_matmul, ret = \ acl.rt.malloc(tensor_size_batch_matmul_y, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) output_buffer_batch_matmul_y = \ acl.create_data_buffer(dev_buffer_batch_matmul, tensor_size_batch_matmul_y) ret = acl.op.execute_v2( OP_TYPE, [input_desc_batch_matmul_x1, input_desc_batch_matmul_x2], [input_buffer_x1, input_buffer_x2], [output_desc_batch_matmul_y], [output_buffer_batch_matmul_y], op_attr, stream) check_ret("acl.op.execute_v2", ret) ret = acl.rt.synchronize_stream(stream) check_ret("acl.rt.synchronize_stream", ret) print("[SingleOp] batch_matmul run success") return get_forward_result(dev_buffer_batch_matmul, tensor_size_batch_matmul_y)
def _gen_input_dataset(self, dvpp_output_buffer, dvpp_output_size): print("[Model] create model input dataset:") self.input_dataset = acl.mdl.create_dataset() input_dataset_buffer = acl.create_data_buffer(dvpp_output_buffer, dvpp_output_size) _, ret = acl.mdl.add_dataset_buffer(self.input_dataset, input_dataset_buffer) if ret: ret = acl.destroy_data_buffer(input_dataset_buffer) check_ret("acl.destroy_data_buffer", ret) print("[Model] create model input dataset success")
def _gen_input_dataset(self, input_buffer, input_size): """ Create input dataset buffer for inference model. input_buffer: The memory holds the input data on device. input_size: The size of device memory holding the input data. """ self.input_dataset = acl.mdl.create_dataset() input_dataset_buffer = acl.create_data_buffer(input_buffer, input_size) _, ret = acl.mdl.add_dataset_buffer(self.input_dataset, input_dataset_buffer) if ret: ret = acl.destroy_data_buffer(input_dataset_buffer) check_ret("acl.destroy_data_buffer", ret)
def _gen_output_dataset(self, size): print("[Model] create model output dataset:") dataset = acl.mdl.create_dataset() for i in range(size): size = acl.mdl.get_output_size_by_index(self.model_desc, i) buffer, ret = acl.rt.malloc(size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) dataset_buffer = acl.create_data_buffer(buffer, size) _, ret = acl.mdl.add_dataset_buffer(dataset, dataset_buffer) if ret: acl.rt.free(buffer) acl.destroy_data_buffer(dataset) check_ret("acl.destroy_data_buffer", ret) self.output_dataset = dataset print("[Model] create model output dataset success")
def _gen_output_tensor(self): print("gen output data stage:") self.operator_output = acl.create_tensor_desc( acl_dtype[self.data_type], self.shape, self.format_type) for factor in [self.operator_output]: factor_size = acl.get_tensor_desc_size(factor) factor_device, ret = acl.rt.malloc(factor_size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) self.device_outputs.append(factor_device) self.device_buffer_outputs.append( acl.create_data_buffer(factor_device, factor_size)) self.host_outputs.append(acl.rt.malloc_host(factor_size)[0]) self.output_desc.append(factor) print("gen output data success")
def init_resource_cast(self): # settings of cast operator self._input_desc = acl.create_tensor_desc(ACL_FLOAT, [self.input_shape], ACL_FORMAT_ND) self._output_desc = acl.create_tensor_desc(ACL_FLOAT16, [self.input_shape], ACL_FORMAT_ND) tensor_size = acl.get_tensor_desc_size(self._output_desc) self.dev_buffer_cast, ret = acl.rt.malloc(tensor_size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) self.output_buffer_cast = acl.create_data_buffer(self.dev_buffer_cast, tensor_size)
def create_input(np_data, size): if "bytes_to_ptr" in dir(acl.util): data_out = np_data bytes_data = np_data.tobytes() ptr = acl.util.bytes_to_ptr(bytes_data) else: ptr, data_out = acl.util.numpy_contiguous_to_ptr(np_data) dev_ptr, ret = acl.rt.malloc(size, ACL_MEM_MALLOC_HUGE_FIRST) ret = acl.rt.memcpy(dev_ptr, size, ptr, size, ACL_MEMCPY_HOST_TO_DEVICE) check_ret("acl.rt.memcpy", ret) return acl.create_data_buffer(dev_ptr, size)
def _load_output_data(self): output_data = acl.mdl.create_dataset() for i in range(self.output_num): # check temp_buffer dtype temp_buffer_size = acl.mdl.get_output_size_by_index( self.model_desc, i) temp_buffer, ret = acl.rt.malloc( temp_buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) data_buf = acl.create_data_buffer(temp_buffer, temp_buffer_size) _, ret = acl.mdl.add_dataset_buffer(output_data, data_buf) if ret != ACL_SUCCESS: ret = acl.destroy_data_buffer(data_buf) check_ret("acl.destroy_data_buffer", ret) return output_data
def _gen_output_dataset(self, ouput_num): log_info("[Model] create model output dataset:") dataset = acl.mdl.create_dataset() for i in range(ouput_num): #malloc device memory for output size = acl.mdl.get_output_size_by_index(self._model_desc, i) buf, ret = acl.rt.malloc(size, const.ACL_MEM_MALLOC_NORMAL_ONLY) utils.check_ret("acl.rt.malloc", ret) #crate oputput data buffer dataset_buffer = acl.create_data_buffer(buf, size) _, ret = acl.mdl.add_dataset_buffer(dataset, dataset_buffer) log_info("malloc output %d, size %d" % (i, size)) if ret: acl.rt.free(buf) acl.destroy_data_buffer(dataset_buffer) utils.check_ret("acl.destroy_data_buffer", ret) self._output_dataset = dataset log_info("Create model output dataset success")
def _gen_dataset(self, type_str="input"): dataset = acl.mdl.create_dataset() temp_dataset = None if type_str == "in": self.load_input_dataset = dataset temp_dataset = self.input_data else: self.load_output_dataset = dataset temp_dataset = self.output_data for item in temp_dataset: data = acl.create_data_buffer(item["buffer"], item["size"]) _, ret = acl.mdl.add_dataset_buffer(dataset, data) if ret != ACL_SUCCESS: ret = acl.destroy_data_buffer(data) check_ret("acl.destroy_data_buffer", ret)
def _gen_output_dataset(self, size): """ Create Output dataset buffer. size: The number of elements in input batch (minibatch size). """ dataset = acl.mdl.create_dataset() for i in range(size): temp_buffer_size = acl.mdl.get_output_size_by_index( self.model_desc, i) temp_buffer, ret = acl.rt.malloc(temp_buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) dataset_buffer = acl.create_data_buffer(temp_buffer, temp_buffer_size) _, ret = acl.mdl.add_dataset_buffer(dataset, dataset_buffer) if ret: ret = acl.destroy_data_buffer(dataset_buffer) check_ret("acl.destroy_data_buffer", ret) self.output_data = dataset
def _gen_output_dataset(self, size): # print("[Model] create model output dataset:") dataset = acl.mdl.create_dataset() for i in range(size): # allocate device memory for each output size = acl.mdl.get_output_size_by_index(self.model_desc, i) buffer, ret = acl.rt.malloc(size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) # create output data buffer structure, fill allocated memory into the data buffer dataset_buffer = acl.create_data_buffer(buffer, size) # add data buffer to output dataset _, ret = acl.mdl.add_dataset_buffer(dataset, dataset_buffer) # print("malloc output %d, size %d"%(i, size)) if ret: # release resource if failed acl.rt.free(buffer) acl.destroy_data_buffer(dataset) check_ret("acl.destroy_data_buffer", ret) self.output_dataset = dataset
def _gen_output_dataset(self, size): print("[Model] create model output dataset:") dataset = acl.mdl.create_dataset() for i in range(size): temp_buffer_size = acl.mdl.\ get_output_size_by_index(self.model_desc, i) temp_buffer, ret = acl.rt.malloc(temp_buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) self.device_outputs.append(temp_buffer) dataset_buffer = acl.create_data_buffer(temp_buffer, temp_buffer_size) _, ret = acl.mdl.add_dataset_buffer(dataset, dataset_buffer) if ret: ret = acl.destroy_data_buffer(dataset_buffer) check_ret("acl.destroy_data_buffer", ret) self.output_data = dataset print("[Model] create model output dataset success")
def _gen_output_dataset(self, size): print("[Model] create model output dataset:") dataset = acl.mdl.create_dataset() for i in range(size): #为每个输出申请device内存 size = acl.mdl.get_output_size_by_index(self.model_desc, i) buffer, ret = acl.rt.malloc(size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) #创建输出的data buffer结构,将申请的内存填入data buffer dataset_buffer = acl.create_data_buffer(buffer, size) #将data buffer加入输出dataset _, ret = acl.mdl.add_dataset_buffer(dataset, dataset_buffer) print("malloc output %d, size %d" % (i, size)) if ret: #如果失败,则释放资源 acl.rt.free(buffer) acl.destroy_data_buffer(dataset) check_ret("acl.destroy_data_buffer", ret) self.output_dataset = dataset print("[Model] create model output dataset success")
def _gen_output_dataset(self, size): print("[Model] create model output dataset:") dataset = acl.mdl.create_dataset() for i in range(size): # create output memory temp_buffer_size = acl.mdl.\ get_output_size_by_index(self.model_desc, i) temp_buffer, ret = acl.rt.malloc(temp_buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) #create output data buffer dataset_buffer = acl.create_data_buffer(temp_buffer, temp_buffer_size) #add data buffer to output dataset _, ret = acl.mdl.add_dataset_buffer(dataset, dataset_buffer) if ret: #free resource acl.rt.free(temp_buffer) acl.destroy_data_buffer(dataset) check_ret("acl.destroy_data_buffer", ret) self.output_dataset = dataset print("[Model] create model output dataset success") return
def _gen_input_tensor(self): print("gen input data stage:") for factor in [self.factor_a, self.factor_b]: tensor = acl.create_tensor_desc(acl_dtype[self.data_type], self.shape, self.format_type) factor_size = acl.get_tensor_desc_size(tensor) factor_device, ret = acl.rt.malloc(factor_size, ACL_MEM_MALLOC_NORMAL_ONLY) check_ret("acl.rt.malloc", ret) if "bytes_to_ptr" in dir(acl.util): bytes_data = factor.tobytes() factor_ptr = acl.util.bytes_to_ptr(bytes_data) else: factor_ptr = acl.util.numpy_to_ptr(factor) ret = acl.rt.memcpy(factor_device, factor_size, factor_ptr, factor_size, ACL_MEMCPY_HOST_TO_DEVICE) check_ret("acl.rt.memcpy", ret) factor_buffer = acl.create_data_buffer(factor_device, factor_size) self._inputs_device.append(factor_device) self._inputs_device_buffer.append(factor_buffer) self._inputs_desc.append(tensor) print("gen input data success")