def forward(self, x): l_data, m_data, h_data = x l_grid_wh = (l_data.size(3), l_data.size(2)) m_grid_wh = (m_data.size(3), m_data.size(2)) h_grid_wh = (h_data.size(3), h_data.size(2)) pred_l, stride_l = permute_sigmoid(l_data, self.input_wh, 3, self.num_classes) pred_m, stride_m = permute_sigmoid(m_data, self.input_wh, 3, self.num_classes) pred_h, stride_h = permute_sigmoid(h_data, self.input_wh, 3, self.num_classes) anchors1 = self.anchors[self. anchors_mask[0][0]:self.anchors_mask[0][-1] + 1] anchors2 = self.anchors[self. anchors_mask[1][0]:self.anchors_mask[1][-1] + 1] anchors3 = self.anchors[self. anchors_mask[2][0]:self.anchors_mask[2][-1] + 1] decode_l = decode(pred_l.detach(), self.input_wh, anchors1, self.num_classes, stride_l) decode_m = decode(pred_m.detach(), self.input_wh, anchors2, self.num_classes, stride_m) decode_h = decode(pred_h.detach(), self.input_wh, anchors3, self.num_classes, stride_h) decode_pred = torch.cat((decode_l, decode_m, decode_h), 1) return decode_pred
def forward(self, predictions): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ # loc, conf, priors = predictions if self.use_arm: arm_loc, arm_conf, loc, conf, priors = predictions arm_loc_data = arm_loc.data arm_conf_data = arm_conf.data arm_object_conf = arm_conf_data[:, 1:] no_object_index = arm_object_conf <= 0.01 #self.object_score conf.data[no_object_index.expand_as(conf.data)] = 0 else: loc, conf, priors = predictions loc_data = loc.data conf_data = conf.data prior_data = priors.data num = loc_data.size(0) # batch size self.num_priors = prior_data.size(0) self.boxes = torch.zeros(1, self.num_priors, 4) self.scores = torch.zeros(1, self.num_priors, self.num_classes) if num == 1: # size batch x num_classes x num_priors conf_preds = conf_data.unsqueeze(0) else: conf_preds = conf_data.view(num, num_priors, self.num_classes) self.boxes.expand_(num, self.num_priors, 4) self.scores.expand_(num, self.num_priors, self.num_classes) # Decode predictions into bboxes. for i in range(num): if self.use_arm: default = decode(arm_loc_data[i], prior_data, self.variance) default = center_size(default) decoded_boxes = decode(loc_data[i], default, self.variance) # decoded_boxes = decode((loc_data[i]+arm_loc_data[i]), prior_data, self.variance) else: decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() ''' c_mask = conf_scores.gt(self.thresh) decoded_boxes = decoded_boxes[c_mask] conf_scores = conf_scores[c_mask] ''' self.boxes[i] = decoded_boxes self.scores[i] = conf_scores return self.boxes, self.scores
def get_FPS(self, image, test_interval): image = np.array(image,np.float32) im_height, im_width, _ = np.shape(image) scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] if self.letterbox_image: image = np.array(letterbox_image(image,[self.input_shape[1], self.input_shape[0]]), np.float32) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() with torch.no_grad(): image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0) if self.cuda: self.anchors = self.anchors.cuda() image = image.cuda() loc, conf, landms = self.net(image) boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance']) boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance']) landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes, conf, landms],-1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) if len(boxes_conf_landms)>0: if self.letterbox_image: boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks t1 = time.time() for _ in range(test_interval): with torch.no_grad(): loc, conf, landms = self.net(image) boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance']) boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance']) landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes, conf, landms],-1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) if len(boxes_conf_landms)>0: if self.letterbox_image: boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def forward(self, predictions): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ # loc, conf, priors = predictions if self.cfg.MODEL.CASCADE: arm_loc, arm_conf, loc, conf, priors = predictions arm_conf = F.softmax(arm_conf.view(-1, 2), 1) conf = F.softmax(conf.view(-1, self.num_classes), 1) arm_loc_data = arm_loc.data arm_conf_data = arm_conf.data arm_object_conf = arm_conf_data[:, 1:] no_object_index = arm_object_conf <= self.object_score # print(torch.sum(no_object_index) / loc.data.size(0), loc.data.size(1)) conf.data[no_object_index.expand_as(conf.data)] = 0 else: loc, conf, priors = predictions conf = F.softmax(conf.view(-1, self.num_classes), 1) loc_data = loc.data conf_data = conf.data # prior_data = priors.data prior_data = priors[:loc_data.size(1), :] num = loc_data.size(0) # batch size self.num_priors = prior_data.size(0) self.boxes = torch.zeros(num, self.num_priors, 4) self.scores = torch.zeros(num, self.num_priors, self.num_classes) conf_preds = conf_data.view(num, self.num_priors, self.num_classes) batch_prior = prior_data.view(-1, self.num_priors, 4).expand( (num, self.num_priors, 4)) batch_prior = batch_prior.contiguous().view(-1, 4) if self.cfg.MODEL.CASCADE: default = decode(arm_loc_data.view(-1, 4), batch_prior, self.variance) default = center_size(default) decoded_boxes = decode(loc_data.view(-1, 4), default, self.variance1) else: decoded_boxes = decode(loc_data.view(-1, 4), batch_prior, self.variance) self.scores = conf_preds.view(num, self.num_priors, self.num_classes) self.boxes = decoded_boxes.view(num, self.num_priors, 4) return self.boxes, self.scores
def forward(self, predictions, prior): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ loc, conf = predictions loc_data = loc.data conf_data = conf.data prior_data = prior.data num = loc_data.size(0) # batch size self.num_priors = prior_data.size(0) self.boxes = torch.zeros(1, self.num_priors, 4) self.scores = torch.zeros(1, self.num_priors, self.num_classes) if loc_data.is_cuda: self.boxes = self.boxes.cuda() self.scores = self.scores.cuda() if num == 1: # size batch x num_classes x num_priors conf_preds = conf_data.unsqueeze(0) else: conf_preds = conf_data.view(num, self.num_priors, self.num_classes) self.boxes.expand_(num, self.num_priors, 4) self.scores.expand_(num, self.num_priors, self.num_classes) # Decode predictions into bboxes. for i in range(num): if self.giou: p = decode(loc_data[i], prior_data, self.variance) decoded_boxes = torch.stack([ torch.min(p[:, 0], p[:, 2]), torch.min(p[:, 1], p[:, 3]), torch.max(p[:, 0], p[:, 2]), torch.max(p[:, 1], p[:, 3]) ], 1) else: decoded_boxes = decode(loc_data[i], prior_data, self.variance) conf_scores = conf_preds[i].clone() self.boxes[i] = decoded_boxes self.scores[i] = conf_scores return self.boxes, self.scores
def box_handle(img, conf, im_height, im_width, scale, loc, landms): priorbox = PriorBox(cfg_mnet, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg_mnet['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg_mnet['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 landms = landms.cpu().numpy() inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] order = scores.argsort()[::-1] boxes = boxes[order] landms = landms[order] scores = scores[order] dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) dets = dets[keep, :] landms = landms[keep] dets = np.concatenate((dets, landms), axis=1) return dets
def _postprocess(loc, conf, landms, priors, cfg, img): """Postprocess TensorRT outputs. # Args loc: [x, y, w, h] conf: [not object confidence, object confidence] landms: [eye_left.x, eye_left.y, eye_right.x, eye_right.y, nose.x, nose.y mouth_left.x, mouth_right.y mouth_left.x, mouth_right.y] priors: priors boxes with retinaface model cfg: retinaface model parameter configure img: input image # Returns facePositions, landmarks (after NMS) """ long_side = max(img.shape) img_size = cfg['image_size'] variance = cfg['variance'] scale = np.ones(4) * img_size scale1 = np.ones(10) * img_size confidence_threshold = 0.2 top_k = 50 nms_threshold = 0.5 # decode boxes boxes = decode(np.squeeze(loc, axis=0), priors, variance) boxes = boxes * scale # decode landmarks landms = decode_landm(np.squeeze(landms, axis=0), priors, variance) landms = landms * scale1 # ignore low scores scores = np.squeeze(conf, axis=0)[:, 1] inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) dets = dets[keep, :] landms = landms[keep] # resize res = long_side / img_size facePositions = (dets[:, :4] * res).astype(int).tolist() landmarks = (landms * res).astype(int).tolist() return facePositions, landmarks
def forward(self, loc_p, loc_t, prior_data): num = loc_p.shape[0] if self.pred_mode == 'Center': decoded_boxes = decode(loc_p, prior_data, self.variances) else: decoded_boxes = loc_p # loss = torch.tensor([1.0]) #gious = 1.0 - bbox_overlaps_giou(decoded_boxes, loc_t) #loss = torch.sum(gious) if self.loss == 'Iou': loss = torch.sum(1.0 - bbox_overlaps_iou(decoded_boxes, loc_t)) else: if self.loss == 'Giou': loss = torch.sum(1.0 - bbox_overlaps_giou(decoded_boxes, loc_t)) else: if self.loss == 'Diou': loss = torch.sum(1.0 - bbox_overlaps_diou(decoded_boxes, loc_t)) else: loss = torch.sum(1.0 - bbox_overlaps_ciou(decoded_boxes, loc_t)) if self.size_sum: loss = loss else: loss = loss / num return 5 * loss
def detect_image(self, image): # 绘制人脸框 old_image = image.copy() image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) # 它的作用是将归一化后的框坐标转换成原图的大小 scale = torch.Tensor([np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]) scale_for_landmarks = torch.Tensor([np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]) # pytorch image = preprocess_input(image).transpose(2, 0, 1) # 增加batch_size维度 image = torch.from_numpy(image).unsqueeze(0) # 计算先验框 anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() with torch.no_grad(): if self.cuda: scale = scale.cuda() scale_for_landmarks = scale_for_landmarks.cuda() image = image.cuda() anchors = anchors.cuda() loc, conf, landms = self.net(image) # forward pass boxes = decode(loc.data.squeeze(0), anchors, self.cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), anchors, self.cfg['variance']) landms = landms * scale_for_landmarks landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes,conf,landms],-1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) for b in boxes_conf_landms: text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) pnum = len(boxes_conf_landms) return old_image , pnum
def forward(self, loc_data, conf_data, prior_data): loc_data = loc_data.cpu() conf_data = conf_data.cpu() num = loc_data.size(0) # batch size num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # 对每一张图片进行处理 for i in range(num): # 对先验框解码获得预测框 decoded_boxes = decode(loc_data[i], prior_data, self.variance) conf_scores = conf_preds[i].clone() for cl in range(1, self.num_classes): # 对每一类进行非极大抑制 c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # 进行非极大抑制 ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) flt = output.contiguous().view(num, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) return output
def nms(class_pred, box_pred, coef_pred, proto_out, anchors, cfg): class_p = class_pred.squeeze() # [19248, 81] box_p = box_pred.squeeze() # [19248, 4] coef_p = coef_pred.squeeze() # [19248, 32] proto_p = proto_out.squeeze() # [138, 138, 32] class_p = class_p.transpose(1, 0).contiguous() # [81, 19248] # exclude the background class class_p = class_p[1:, :] # get the max score class of 19248 predicted boxes class_p_max, _ = torch.max(class_p, dim=0) # [19248] # filter predicted boxes according the class score keep = (class_p_max > cfg.nms_score_thre) class_thre = class_p[:, keep] box_thre = decode(box_p[keep, :], anchors[keep, :]) coef_thre = coef_p[keep, :] if class_thre.shape[1] == 0: return None, None, None, None, None else: if not cfg.traditional_nms: box_thre, coef_thre, class_ids, class_thre = fast_nms( box_thre, coef_thre, class_thre, cfg) else: box_thre, coef_thre, class_ids, class_thre = traditional_nms( box_thre, coef_thre, class_thre, cfg) return class_ids, class_thre, box_thre, coef_thre, proto_p
def forward(self, predictions, prior, scale): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] bin_conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ loc, conf, bin_conf = predictions loc_data = loc.data conf_data = conf.data bin_conf_data = bin_conf.data prior_data = prior.data num = loc_data.size(0) # batch size self.num_priors = prior_data.size(0) self.boxes = torch.zeros(1, self.num_priors, 4) self.bin_scores = torch.zeros(1, self.num_priors, 2) self.scores = torch.zeros(1, self.num_priors, self.num_classes) if loc_data.is_cuda: self.boxes = self.boxes.cuda() self.bin_scores = self.bin_scores.cuda() self.scores = self.scores.cuda() if num == 1: # size batch x num_classes x num_priors conf_preds = conf_data.unsqueeze(0) bin_conf_preds = bin_conf_data.unsqueeze(0) else: conf_preds = conf_data.view(num, num_priors, self.num_classes) bin_conf_preds = bin_conf_data.view(num, num_priors, self.num_classes) self.boxes.expand_(num, self.num_priors, 4) self.bin_scores.expand_(num, self.num_priors, 2) self.scores.expand_(num, self.num_priors, self.num_classes) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) conf_scores = conf_preds[i].clone() bin_scores = bin_conf_preds[i].clone() soft_conf_scores = torch.zeros(self.num_priors, self.num_classes) soft_conf_scores[:, 1:] = (conf_scores[:, 1:].t() * bin_scores[:, 1]).t() soft_conf_scores[:, 0] = bin_scores[:, 0] + conf_scores[:, 0] * bin_scores[:, 0] self.boxes[i] = decoded_boxes #self.scores[i] = soft_conf_scores self.scores[i] = conf_scores #self.scores[i] = bin_scores return self.boxes, self.scores
def forward(self, predictions, prior): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ loc, conf = predictions loc_data = loc.data conf_data = conf.data prior_data = prior.data num = loc_data.size(0) # batch size self.num_priors = prior_data.size(0) self.boxes = torch.zeros(num, self.num_priors, 4) self.scores = torch.zeros(num, self.num_priors, self.num_classes) if loc_data.is_cuda: self.boxes = self.boxes.cuda() self.scores = self.scores.cuda() conf_preds = conf_data.view(num, self.num_priors, self.num_classes) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) conf_scores = conf_preds[i].clone() self.boxes[i] = decoded_boxes self.scores[i] = conf_scores return self.boxes, self.scores
def detect(self, img): device = self.device prior_data, scale, scale1 = self.decode_params(*img.shape[:2]) # REF: test_fddb.py img = np.float32(img) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device, dtype=torch.float32) loc, conf, landms = self.net(img) loc = loc.cpu() conf = conf.cpu() landms = landms.cpu() # Decode results boxes = decode(loc.squeeze(0), prior_data, self.variance) boxes = boxes * scale scores = conf.squeeze(0)[:, 1] landms = decode_landm(landms.squeeze(0), prior_data, self.variance) landms = landms * scale1 inds = scores > self.confidence_threshold boxes = boxes[inds] landms = landms[inds] return boxes, landms
def do_detect(img_raw, net, device, cfg): resize = 1 img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] # dets = np.concatenate((dets, landms), axis=1) return dets, landms
def forward(self, loc_data, conf_data, prior_data): #--------------------------------# # 先转换成cpu下运行 #--------------------------------# loc_data = loc_data.cpu() conf_data = conf_data.cpu() #--------------------------------# # num的值为batch_size # num_priors为先验框的数量 #--------------------------------# num = loc_data.size(0) num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) #--------------------------------------# # 对分类预测结果进行reshape # num, num_classes, num_priors #--------------------------------------# conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # 对每一张图片进行处理正常预测的时候只有一张图片,所以只会循环一次 for i in range(num): #--------------------------------------# # 对先验框解码获得预测框 # 解码后,获得的结果的shape为 # num_priors, 4 #--------------------------------------# decoded_boxes = decode(loc_data[i], prior_data, self.variance) conf_scores = conf_preds[i].clone() #--------------------------------------# # 获得每一个类对应的分类结果 # num_priors, #--------------------------------------# for cl in range(1, self.num_classes): #--------------------------------------# # 首先利用门限进行判断 # 然后取出满足门限的得分 #--------------------------------------# c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) #--------------------------------------# # 将满足门限的预测框取出来 #--------------------------------------# boxes = decoded_boxes[l_mask].view(-1, 4) #--------------------------------------# # 利用这些预测框进行非极大抑制 #--------------------------------------# ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = torch.cat( (scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) return output
def forward(self, preds, priors, targets, pos): preds = preds.view(-1, 4) targets = targets.view(-1, 4) boxes_p = decode(preds, priors, self.variance) boxes_t = decode(targets, priors, self.variance) boxes_p = boxes_p * self.scale boxes_t = boxes_t * self.scale boxes_p = boxes_p[pos] boxes_t = boxes_t[pos] b1_x1, b1_y1, b1_x2, b1_y2 = boxes_p[:,0], boxes_p[:,1], boxes_p[:,2], boxes_p[:,3] b2_x1, b2_y1, b2_x2, b2_y2 = boxes_t[:,0], boxes_p[:,1], boxes_p[:,2], boxes_p[:,3] inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 union = (w1 * h1 + 1e-16) + w2 * h2 -inter iou = inter / union cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) if self.loss_type == "giou": c_area = cw * ch + 1e-16 giou = (iou - (c_area - union) / c_area) loss = 1- giou return loss.sum() c2 = cw**2 + ch**2 + 1e-16 rho2 = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2)) ** 2 / 4 + ((b2_y1 + b2_y2) -(b1_y1 + b1_y2)) ** 2 / 4 if self.loss_type == "diou": diou = iou - rho2 / c2 loss = 1- diou return loss.sum() if self.loss_type == "ciou": v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) with torch.no_grad(): alpha = v / (1 - iou + v) ciou = iou - (rho2 / c2 + v * alpha) loss = 1- ciou return loss.sum()
def facebox_detect(self, img_raw): img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) # w, h, w, h scale_coords =torch.Tensor(np.tile([img.shape[1], img.shape[0]], 5)) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) scale_coords = scale_coords.to(self.device) loc, conf, coords = self.model(img) # forward pass print("bbbb", loc.shape, conf.shape, coords.shape) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) coords = decode_f(coords, self.cfg['variance']) # may XXXXXXXXX boxes = boxes * scale coords = coords * scale_coords coords = coords.data.squeeze(0).cpu().numpy() #coords = coords.cpu().detach().squeeze(0).numpy() # coords is grad variable, can't trans to numpy direct boxes = boxes.cpu().numpy() # print("aaaa",boxes.shape, coords.shape) scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.cfg['confidence_threshold'])[0] boxes = boxes[inds] scores = scores[inds] coords = coords[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.cfg['top_k']] boxes = boxes[order] scores = scores[order] coords = coords[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) #keep = py_cpu_nms(dets, args.nms_threshold) keep = nms(dets, self.cfg['nms_threshold'],False) # change nms for coords, make code simple dets = dets[keep, :] coords = coords[keep, :] # keep top-K faster NMS boxes_score = dets[:self.cfg['keep_top_k'], :] coords = coords[:self.cfg['keep_top_k'], :] # boxes_score[:, :-1] += 1 # remove the locat is not positive po_ng = np.array([np.any(box<0) for box in boxes_score]) boxes_score = boxes_score[np.where(po_ng==False)] coords = coords[np.where(po_ng==False)] boxes_score_coords = np.hstack((boxes_score, coords)) # print("boxes_score_coords:", boxes_score_coords, boxes_score_coords.shape) return boxes_score_coords
def forward(self, x): l_data, m_data, h_data = x l_grid_wh = (l_data.size(3), l_data.size(2)) m_grid_wh = (m_data.size(3), m_data.size(2)) h_grid_wh = (h_data.size(3), h_data.size(2)) pred_l, stride_l = permute_sigmoid(l_data, self.input_wh, 3, self.num_classes) pred_m, stride_m = permute_sigmoid(m_data, self.input_wh, 3, self.num_classes) pred_h, stride_h = permute_sigmoid(h_data, self.input_wh, 3, self.num_classes) anchors1 = self.anchors[self. anchors_mask[0][0]:self.anchors_mask[0][-1] + 1] anchors2 = self.anchors[self. anchors_mask[1][0]:self.anchors_mask[1][-1] + 1] anchors3 = self.anchors[self. anchors_mask[2][0]:self.anchors_mask[2][-1] + 1] decode_l = decode(pred_l.detach(), self.input_wh, anchors1, self.num_classes, stride_l) decode_m = decode(pred_m.detach(), self.input_wh, anchors2, self.num_classes, stride_m) decode_h = decode(pred_h.detach(), self.input_wh, anchors3, self.num_classes, stride_h) decode_pred = torch.cat((decode_l, decode_m, decode_h), 1).view( -1, l_grid_wh[0] * l_grid_wh[1] + m_grid_wh[0] * m_grid_wh[1] + h_grid_wh[0] * h_grid_wh[1], 3, 5 + self.num_classes) decode_pred_maxconf, _ = torch.max(decode_pred[..., 4:5], dim=2, keepdim=True) decode_pred_maxconf -= 0.1 decode_pred_frac = nn.functional.relu(decode_pred[..., 4:5] - decode_pred_maxconf) decode_pred_frac = decode_pred_frac / torch.sum( decode_pred_frac, dim=2, keepdim=True) decode_pred = torch.sum(decode_pred * decode_pred_frac, dim=2) #print(decode_pred.shape,decode_pred_maxconf.shape) decode_pred[:, :, 4] = decode_pred_maxconf[:, :, 0, 0] + 0.1 return decode_pred
def forward(self, loc_data, conf_data, prior_data): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ num = loc_data.size(0) # batch size num_priors = prior_data.size(0) self.output.zero_() if num == 1: # size batch x num_classes x num_priors conf_preds = conf_data.t().contiguous().unsqueeze(0) else: conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) self.output.expand_(num, self.num_classes, self.top_k, 5) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() for cl in range(1, self.num_classes): c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.dim() == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class # NMS if self.soft_nms == -1: ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) self.output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) else: count = boxes.size( 0) if boxes.size(0) < self.top_k else self.top_k new_scores, new_boxes = soft_nms(boxes, scores, self.nms_thresh, self.top_k, type=self.soft_nms) self.output[i, cl, :count] = torch.cat( (new_scores.unsqueeze(1), new_boxes), 1) # flt = self.output.view(-1, 5) # _, idx = flt[:, 0].sort(0) # _, rank = idx.sort(0) # flt[(rank >= self.top_k).unsqueeze(1).expand_as(flt)].fill_(0) return self.output
def forward(self, predict, priors, target, variance=[0.1, 0.2]): assert priors.shape == predict.shape == target.shape, "GIoU loss ERROR!" p = decode(predict, priors, variance) loss = 1 - self._GIoU(p, target) if self.size_average: return loss.mean() else: return loss.sum()
def forward(self, predictions, prior): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ loc, conf, obj = predictions loc_data = loc.data conf_data = conf.data obj_data = obj.data prior_data = prior.data no_obj_index = obj_data[:, :, 1] < self.obj_thresh #print(conf_data.shape) #print(no_obj_index.shape) conf_data[no_obj_index.unsqueeze(2).expand_as(conf_data)] = 0 num = loc_data.size(0) # batch size self.num_priors = prior_data.size(0) self.boxes = torch.zeros(1, self.num_priors, 4) self.scores = torch.zeros(1, self.num_priors, self.num_classes) self.obj = torch.zeros(1, self.num_priors, 2) if loc_data.is_cuda: self.boxes = self.boxes.cuda() self.scores = self.scores.cuda() self.obj = self.obj.cuda() if num == 1: # size batch x num_classes x num_priors conf_preds = conf_data.unsqueeze(0) obj_preds = obj_data.unsqueeze(0) else: conf_preds = conf_data.view(num, num_priors, self.num_classes) obj_preds = obj_data.view(nu, num_priors, 2) self.boxes.expand_(num, self.num_priors, 4) self.scores.expand_(num, self.num_priors, self.num_classes) self.obj.expand_(num, self.num_priors, 2) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) conf_scores = conf_preds[i].clone() obj_scores = obj_preds[i].clone() self.boxes[i] = decoded_boxes self.scores[i] = conf_scores self.obj[i] = obj_scores return self.boxes, self.scores, self.obj
def pipeline(net, frame, args, device, resize, cfg): img = np.float32(frame) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) objects_to_draw = dict(draw_box=True, draw_text=True, draw_landmarks=True) frame = draw(frame, dets, args.vis_thres, **objects_to_draw) return frame
def process_face_data(cfg, im, im_height, im_width, loc, scale, conf, landms, resize, top_k=5000, nms_threshold=0.4, keep_top_k=750): priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.cuda() priors_data = priors.data boxes = decode(loc.data.squeeze(0), priors_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).cpu().detach().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), priors_data, cfg['variance']) scale_landm = torch.from_numpy( np.array([ im.shape[3], im.shape[2], im.shape[3], im.shape[2], im.shape[3], im.shape[2], im.shape[3], im.shape[2], im.shape[3], im.shape[2] ])) scale_landm = scale_landm.float() scale_landm = scale_landm.cuda() landms = landms * scale_landm / resize landms = landms.cpu().numpy() # ignore low score inds = np.where(scores > 0.6)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = np.argsort(-scores)[:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do nms dets = np.hstack((boxes, scores[:, np.newaxis])).astype(float, copy=False) keep = py_cpu_nms(dets, nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K fater NMS dets = dets[:keep_top_k, :] landms = landms[:keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) result_data = dets[:, :5].tolist() return result_data
def forward(self, loc_data, ground_data, prior_data): decoded_boxes = decode(loc_data, prior_data, self.variance) iog = IoG(ground_data, decoded_boxes) # sigma = 1 # loss = torch.sum(-torch.log(1-iog+1e-10)) # sigma = 0 loss = torch.sum(iog) return loss
def predict(self, img_name): img = np.float32(cv2.imread(img_name, cv2.IMREAD_COLOR)) resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) _t = {'forward_pass': Timer(), 'misc': Timer()} _t['forward_pass'].tic() loc, conf = self.net(img) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) #keep = py_cpu_nms(dets, self.nms_threshold) keep = nms(dets, self.nms_threshold, force_cpu=self.cpu) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] _t['misc'].toc() return dets
def detect_image(self, img) -> List[FaceDetection]: # TODO: add detect logic for single image print(np.shape(img)) tic = time.time() img = np.float32(img) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf, landms = self.net(img) # forward pass priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / self.resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] dets = dets[:args.keep_top_k, :] # show image box_list = [] for b in dets: if b[4] < args.vis_thres: continue score = b[4] b = list(map(int, b)) box_list.append(FaceDetection(b[0], b[1], b[2], b[3], 0, score)) print('net forward time: {:.4f}'.format(time.time() - tic)) return box_list
def GetFacialPoints(img_raw): img = np.float32(img_raw) height, width, _ = img_raw.shape scale = torch.Tensor([width, height, width, height]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(height, width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / config.resize boxes = boxes.cpu().detach().numpy() scores = conf.squeeze(0).data.cpu().detach().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / config.resize landms = landms.cpu().detach().numpy() # ignore low scores inds = np.where(scores > config.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:config.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, config.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:config.keep_top_k, :] landms = landms[:config.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) torch.cuda.empty_cache() return dets
def detection_out(self,input,num_classes,objectness_score,name): variance = [0.1,0.2] loc, conf = input[0],input[1] prior_data = input[2] arm_loc, arm_conf = input[4], input[3] arm_loc = tf.reshape(arm_loc, [arm_loc.shape[0], -1, 4]) arm_conf = tf.reshape(arm_conf, [-1, 2]) # conf preds loc = tf.reshape(loc, [loc.shape[0], -1, 4]) conf = tf.reshape(conf, [-1, num_classes]) prior_data = tf.reshape(prior_data, [-1, 4]) loc_data = loc conf_data = conf num = loc_data.shape[0] # batch size arm_loc_data = arm_loc arm_conf_data = arm_conf arm_object_conf = arm_conf_data[:, 1:] no_object_index = arm_object_conf <= objectness_score expands = tf.tile(no_object_index, [1, num_classes]) conf_data = tf.where(expands, tf.zeros_like(conf_data), conf_data) num_priors = prior_data.shape[0] if num == 1: # size batch x num_classes x num_priors conf_preds = tf.expand_dims(conf_data, 0) # Decode predictions into bboxes. for i in range(num): default = decode(arm_loc_data[i], prior_data, variance) default = center_size(default) decoded_boxes = decode(loc_data[i], default, variance) # For each class, perform nms conf_scores = conf_preds[i] boxes = tf.expand_dims(decoded_boxes, 0) scores = tf.expand_dims(conf_scores, 0) return boxes, scores
def decode_output(image, detection_boxes, detection_scores, detection_landmark, cfg_plate): # print(image.shape[2:]) image_h, image_w = image.shape[2:] # image_h, image_w, _ = image.shape # cfg_plate['image_size'] = (480, 640) detection_scores = F.softmax(detection_scores, dim=-1) # detection_scores = detection_scores.cpu().detach().numpy() # priorbox = PriorBox(cfg_plate, # image_size=(cfg_plate['image_size'], cfg_plate['image_size']), phase='test') # height, width priorbox = PriorBox(cfg_plate, image_size=(image_h, image_w), phase='test') # height, width priors = priorbox.forward() priors = priors.to(torch.device('cuda')) prior_data = priors.data boxes = decode(detection_boxes.data.squeeze(0), prior_data, cfg_plate['variance']) # boxes[:, 0::2] = boxes[:, 0::2] * cfg_plate['image_size'] # width # boxes[:, 1::2] = boxes[:, 1::2] * cfg_plate['image_size'] # height boxes[:, 0::2] = boxes[:, 0::2] * image_w # width boxes[:, 1::2] = boxes[:, 1::2] * image_h # height boxes = boxes.cpu().numpy() scores = scores = detection_scores.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(detection_landmark.data.squeeze(0), prior_data, cfg_plate['variance']) # landms[:, 0::2] = landms[:, 0::2] * cfg_plate['image_size'] # landms[:, 1::2] = landms[:, 1::2] * cfg_plate['image_size'] landms[:, 0::2] = landms[:, 0::2] * image_w landms[:, 1::2] = landms[:, 1::2] * image_h landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > cfg_plate['confidence_threshold'])[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:cfg_plate['top_k']] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, cfg_plate['nms_threshold']) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:cfg_plate['keep_top_k'], :] landms = landms[:cfg_plate['keep_top_k'], :] dets = np.concatenate((dets, landms), axis=1) # draw_ouput2(image, dets) return dets
def forward(self, predictions, prior): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ loc, conf = predictions loc_data = loc.data conf_data = conf.data prior_data = prior.data num = loc_data.size(0) # batch size self.num_priors = prior_data.size(0) self.boxes = torch.zeros(1, self.num_priors, 4) self.scores = torch.zeros(1, self.num_priors, self.num_classes) if num == 1: # size batch x num_classes x num_priors conf_preds = conf_data.unsqueeze(0) else: conf_preds = conf_data.view(num, num_priors, self.num_classes) self.boxes.expand_(num, self.num_priors, 4) self.scores.expand_(num, self.num_priors, self.num_classes) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() ''' c_mask = conf_scores.gt(self.thresh) decoded_boxes = decoded_boxes[c_mask] conf_scores = conf_scores[c_mask] ''' self.boxes[i] = decoded_boxes self.scores[i] = conf_scores return self.boxes, self.scores