class Yolov3(object): def __init__(self, acl_resource, model_width, model_height): self._acl_resource = acl_resource self._model_width = model_width self._model_height = model_height #使用dvpp处理图像,当使用opencv或者PIL时则不需要创建dvpp实例 self._dvpp = Dvpp(acl_resource) #创建yolov3网络的图像信息输入数据 self._image_info = np.array([model_width, model_height, model_width, model_height], dtype=np.float32) def __del__(self): if self._dvpp: del self._dvpp print("Release yolov3 resource finished") def pre_process(self, image): #使用dvpp将图像缩放到模型要求大小 resized_image = self._dvpp.resize(image, self._model_width, self._model_height) #输出缩放后的图像和图像信息作为推理输入数据 return [resized_image, self._image_info] def post_process(self, infer_output, origin_img): #解析推理输出数据 detection_result_list = self._analyze_inference_output(infer_output, origin_img) #将yuv图像转换为jpeg图像 jpeg_image = self._dvpp.jpege(origin_img) return jpeg_image, detection_result_list def _analyze_inference_output(self, infer_output, origin_img): #yolov3网络有两个输出,第二个(下标1)输出为框的个数 box_num = int(infer_output[1][0, 0]) #第一个(下标0)输出为框信息 box_info = infer_output[0] #输出的框信息是在mode_width*model_height大小的图片上的坐标 #需要转换到原始图片上的坐标 scalex = origin_img.width / self._model_width scaley = origin_img.height / self._model_height detection_result_list = [] for i in range(box_num): #检测到的物体类别编号 id = int(box_info[0, LABEL * box_num + i]) if id >= len(labels): print("class id %d out of range" % (id)) continue detection_item = presenter_datatype.ObjectDetectionResult() detection_item.object_class = id #检测到的物体置信度 detection_item.confidence = box_info[0, SCORE * box_num + i] #物体位置框坐标 detection_item.box.lt.x = int(box_info[0, TOP_LEFT_X * box_num + i] * scalex) detection_item.box.lt.y = int(box_info[0, TOP_LEFT_Y * box_num + i] * scaley) detection_item.box.rb.x = int(box_info[0, BOTTOM_RIGHT_X * box_num + i] * scalex) detection_item.box.rb.y = int(box_info[0, BOTTOM_RIGHT_Y * box_num + i] * scaley) #将置信度和类别名称组织为字符串 if labels == []: detection_item.result_text = str(detection_item.object_class) + " " + str( round(detection_item.confidence * 100, 2)) + "%" else: detection_item.result_text = str(labels[detection_item.object_class]) + " " + str( round(detection_item.confidence * 100, 2)) + "%" detection_result_list.append(detection_item) return detection_result_list
class VggSsd(object): def __init__(self, acl_resource, model_width, model_height): self._acl_resource = acl_resource self._model_width = model_width self._model_height = model_height #使用dvpp处理图像,当使用opencv或者PIL时则不需要创建dvpp实例 self._dvpp = Dvpp(acl_resource) def __del__(self): print("Release yolov3 resource finished") def pre_process(self, image): #使用dvpp将图像缩放到模型要求大小 resized_image = self._dvpp.resize(image, self._model_width, self._model_height) if resized_image == None: print("Resize image failed") return None #输出缩放后的图像和图像信息作为推理输入数据 return [ resized_image, ] # img_h = image.size[1] # img_w = image.size[0] # net_h = MODEL_HEIGHT # net_w = MODEL_WIDTH # scale = min(float(net_w) / float(img_w), float(net_h) / float(img_h)) # new_w = int(img_w * scale) # new_h = int(img_h * scale) # shift_x = (net_w - new_w) // 2 # shift_y = (net_h - new_h) // 2 # shift_x_ratio = (net_w - new_w) / 2.0 / net_w # shift_y_ratio = (net_h - new_h) / 2.0 / net_h # image_ = image.resize( (new_w, new_h)) # new_image = np.zeros((net_h, net_w, 3), np.uint8) # new_image[shift_y: new_h + shift_y, shift_x: new_w + shift_x, :] = np.array(image_) # new_image = new_image.astype(np.float32) # new_image = new_image / 255 # return new_image def post_process(self, infer_output, origin_img): #解析推理输出数据 detection_result_list = self._analyze_inference_output( infer_output, origin_img) #将yuv图像转换为jpeg图像 jpeg_image = self._dvpp.jpege(origin_img) return jpeg_image, detection_result_list def overlap(self, x1, x2, x3, x4): left = max(x1, x3) right = min(x2, x4) return right - left def cal_iou(self, box, truth): w = self.overlap(box[0], box[2], truth[0], truth[2]) h = self.overlap(box[1], box[3], truth[1], truth[3]) if w <= 0 or h <= 0: return 0 inter_area = w * h union_area = (box[2] - box[0]) * (box[3] - box[1]) + ( truth[2] - truth[0]) * (truth[3] - truth[1]) - inter_area return inter_area * 1.0 / union_area def apply_nms(self, all_boxes, thres): res = [] for cls in range(class_num): cls_bboxes = all_boxes[cls] sorted_boxes = sorted(cls_bboxes, key=lambda d: d[5])[::-1] p = dict() for i in range(len(sorted_boxes)): if i in p: continue truth = sorted_boxes[i] for j in range(i + 1, len(sorted_boxes)): if j in p: continue box = sorted_boxes[j] iou = self.cal_iou(box, truth) if iou >= thres: p[j] = 1 for i in range(len(sorted_boxes)): if i not in p: res.append(sorted_boxes[i]) return res def decode_bbox(self, conv_output, anchors, img_w, img_h, x_scale, y_scale, shift_x_ratio, shift_y_ratio): def _sigmoid(x): s = 1 / (1 + np.exp(-x)) return s h, w, _ = conv_output.shape pred = conv_output.reshape((h * w, 3, 5 + class_num)) pred[..., 4:] = _sigmoid(pred[..., 4:]) pred[..., 0] = (_sigmoid(pred[..., 0]) + np.tile(range(w), (3, h)).transpose( (1, 0))) / w pred[..., 1] = (_sigmoid(pred[..., 1]) + np.tile(np.repeat(range(h), w), (3, 1)).transpose( (1, 0))) / h pred[..., 2] = np.exp(pred[..., 2]) * anchors[:, 0:1].transpose( (1, 0)) / w pred[..., 3] = np.exp(pred[..., 3]) * anchors[:, 1:2].transpose( (1, 0)) / h bbox = np.zeros((h * w, 3, 4)) bbox[..., 0] = np.maximum( (pred[..., 0] - pred[..., 2] / 2.0 - shift_x_ratio) * x_scale * img_w, 0) # x_min bbox[..., 1] = np.maximum( (pred[..., 1] - pred[..., 3] / 2.0 - shift_y_ratio) * y_scale * img_h, 0) # y_min bbox[..., 2] = np.minimum( (pred[..., 0] + pred[..., 2] / 2.0 - shift_x_ratio) * x_scale * img_w, img_w) # x_max bbox[..., 3] = np.minimum( (pred[..., 1] + pred[..., 3] / 2.0 - shift_y_ratio) * y_scale * img_h, img_h) # y_max pred[..., :4] = bbox pred = pred.reshape((-1, 5 + class_num)) pred[:, 4] = pred[:, 4] * pred[:, 5:].max(1) pred = pred[pred[:, 4] >= conf_threshold] pred[:, 5] = np.argmax(pred[:, 5:], axis=-1) all_boxes = [[] for ix in range(class_num)] for ix in range(pred.shape[0]): box = [int(pred[ix, iy]) for iy in range(4)] box.append(int(pred[ix, 5])) box.append(pred[ix, 4]) all_boxes[box[4] - 1].append(box) return all_boxes def convert_labels(self, label_list): if isinstance(label_list, np.ndarray): label_list = label_list.tolist() label_names = [labels[int(index)] for index in label_list] return label_names def _analyze_inference_output(self, infer_output, origin_img): result_return = dict() #img_h = origin_img.size[1] #img_w = origin_img.size[0] img_h = origin_img.height img_w = origin_img.width scale = min( float(MODEL_WIDTH) / float(img_w), float(MODEL_HEIGHT) / float(img_h)) new_w = int(img_w * scale) new_h = int(img_h * scale) shift_x_ratio = (MODEL_WIDTH - new_w) / 2.0 / MODEL_WIDTH shift_y_ratio = (MODEL_HEIGHT - new_h) / 2.0 / MODEL_HEIGHT class_num = len(labels) num_channel = 3 * (class_num + 5) x_scale = MODEL_WIDTH / float(new_w) y_scale = MODEL_HEIGHT / float(new_h) all_boxes = [[] for ix in range(class_num)] for ix in range(3): pred = infer_output[2 - ix].reshape( (MODEL_HEIGHT // stride_list[ix], MODEL_WIDTH // stride_list[ix], num_channel)) anchors = anchor_list[ix] boxes = self.decode_bbox(pred, anchors, img_w, img_h, x_scale, y_scale, shift_x_ratio, shift_y_ratio) all_boxes = [all_boxes[iy] + boxes[iy] for iy in range(class_num)] res = self.apply_nms(all_boxes, iou_threshold) if not res: result_return['detection_classes'] = [] result_return['detection_boxes'] = [] result_return['detection_scores'] = [] # return result_return else: new_res = np.array(res) picked_boxes = new_res[:, 0:4] picked_boxes = picked_boxes[:, [1, 0, 3, 2]] picked_classes = self.convert_labels(new_res[:, 4]) picked_score = new_res[:, 5] result_return['detection_classes'] = picked_classes result_return['detection_boxes'] = picked_boxes.tolist() result_return['detection_scores'] = picked_score.tolist() # return result_return detection_result_list = [] for i in range(len(result_return['detection_classes'])): box = result_return['detection_boxes'][i] class_name = result_return['detection_classes'][i] confidence = result_return['detection_scores'][i] detection_item = presenter_datatype.ObjectDetectionResult() detection_item.confidence = confidence detection_item.box.lt.x = int(box[1]) detection_item.box.lt.y = int(box[0]) detection_item.box.rb.x = int(box[3]) detection_item.box.rb.y = int(box[2]) detection_item.result_text = str(class_name) detection_result_list.append(detection_item) return detection_result_list