Ejemplo n.º 1
0
def box_handle(img, conf, im_height, im_width, scale, loc, landms):
    priorbox = PriorBox(cfg_mnet, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg_mnet['variance'])
    boxes = boxes * scale
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data,
                          cfg_mnet['variance'])
    scale1 = torch.Tensor([
        img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3],
        img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
    ])
    scale1 = scale1.to(device)
    landms = landms * scale1
    landms = landms.cpu().numpy()
    inds = np.where(scores > confidence_threshold)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]
    order = scores.argsort()[::-1]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, nms_threshold)
    dets = dets[keep, :]
    landms = landms[keep]
    dets = np.concatenate((dets, landms), axis=1)
    return dets
Ejemplo n.º 2
0
def do_detect(img_raw, net, device, cfg):
    resize = 1
    img = np.float32(img_raw)

    im_height, im_width, _ = img.shape
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    tic = time.time()
    loc, conf, landms = net(img)  # forward pass
    print('net forward time: {:.4f}'.format(time.time() - tic))

    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([
        img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3],
        img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
    ])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > args.confidence_threshold)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:args.top_k]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, args.nms_threshold)
    # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    dets = dets[:args.keep_top_k, :]
    landms = landms[:args.keep_top_k, :]

    # dets = np.concatenate((dets, landms), axis=1)
    return dets, landms
def find_faces(frames: Tensor, chunk_size: int, model: torch.nn.Module,
               device: torch.device, conf: Dict[str, Any]) -> List[Tensor]:
    D, H, W, C = frames.shape
    # D, H, W, C -> D, C, H, W
    frames_orig = frames.permute(0, 3, 1, 2)
    frames, scale = prepare_imgs(frames)
    prior_box = PriorBox(conf, image_size=(H, W))
    priors = prior_box.forward().to(device)
    scale = scale.to(device)

    detections = []
    for start in range(0, D, chunk_size):
        end = start + chunk_size
        with torch.no_grad():
            locations, confidence, landmarks = model(frames[start:end])
            del landmarks
        det_chunk = postproc_detections_gpu(locations, confidence, priors,
                                            scale, conf)
        detections.extend(det_chunk)
        del locations, confidence
    del priors, prior_box, scale, frames

    num_faces = np.array(list(map(len, detections)), dtype=np.uint8)
    max_faces = max_num_faces(num_faces, conf['max_face_num_thresh'])
    faces = []
    for f in range(D):
        for bbox in detections[f][:max_faces]:
            face = crop_square_torch(frames_orig[f], bbox[:4])
            if face is not None:
                faces.append(face)
    del detections, frames_orig
    return faces
Ejemplo n.º 4
0
    def _initialize_priorbox(self, cfg, im_height, im_width):
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data

        return prior_data
Ejemplo n.º 5
0
 def facebox_detect(self, img_raw):
     img = np.float32(img_raw)
     im_height, im_width, _ = img.shape
     scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])    # w, h, w, h
     scale_coords =torch.Tensor(np.tile([img.shape[1], img.shape[0]], 5))
     img -= (104, 117, 123)
     img = img.transpose(2, 0, 1)
     img = torch.from_numpy(img).unsqueeze(0)
     img = img.to(self.device)
     scale = scale.to(self.device)
     scale_coords = scale_coords.to(self.device)
 
     loc, conf, coords = self.model(img)  # forward pass
     print("bbbb", loc.shape, conf.shape, coords.shape)
     priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
     priors = priorbox.forward()
     priors = priors.to(self.device)
     prior_data = priors.data
     boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
     coords = decode_f(coords, self.cfg['variance'])    # may XXXXXXXXX
     boxes = boxes * scale
     coords = coords * scale_coords
     coords = coords.data.squeeze(0).cpu().numpy()
     #coords = coords.cpu().detach().squeeze(0).numpy()    # coords is  grad variable, can't trans to numpy direct
     boxes = boxes.cpu().numpy()
     # print("aaaa",boxes.shape, coords.shape)
     scores = conf.data.cpu().numpy()[:, 1]
 
     # ignore low scores
     inds = np.where(scores > self.cfg['confidence_threshold'])[0]
     boxes = boxes[inds]
     scores = scores[inds]
     coords = coords[inds]
 
     # keep top-K before NMS
     order = scores.argsort()[::-1][:self.cfg['top_k']]
     boxes = boxes[order]
     scores = scores[order]
     coords = coords[order]
 
     # do NMS
     dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
     #keep = py_cpu_nms(dets, args.nms_threshold)
     keep = nms(dets, self.cfg['nms_threshold'],False)    # change nms for coords, make code simple
     dets = dets[keep, :]
     coords = coords[keep, :]
 
     # keep top-K faster NMS
     boxes_score = dets[:self.cfg['keep_top_k'], :]
     coords = coords[:self.cfg['keep_top_k'], :]
     # boxes_score[:, :-1] += 1
     # remove the locat is not positive
     po_ng = np.array([np.any(box<0) for box in boxes_score])
     boxes_score = boxes_score[np.where(po_ng==False)]
     coords = coords[np.where(po_ng==False)]
     boxes_score_coords = np.hstack((boxes_score, coords))
     # print("boxes_score_coords:", boxes_score_coords, boxes_score_coords.shape)
     return boxes_score_coords
Ejemplo n.º 6
0
def process_face_data(cfg,
                      im,
                      im_height,
                      im_width,
                      loc,
                      scale,
                      conf,
                      landms,
                      resize,
                      top_k=5000,
                      nms_threshold=0.4,
                      keep_top_k=750):
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.cuda()
    priors_data = priors.data
    boxes = decode(loc.data.squeeze(0), priors_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).cpu().detach().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), priors_data, cfg['variance'])
    scale_landm = torch.from_numpy(
        np.array([
            im.shape[3], im.shape[2], im.shape[3], im.shape[2], im.shape[3],
            im.shape[2], im.shape[3], im.shape[2], im.shape[3], im.shape[2]
        ]))
    scale_landm = scale_landm.float()
    scale_landm = scale_landm.cuda()
    landms = landms * scale_landm / resize
    landms = landms.cpu().numpy()

    # ignore low score
    inds = np.where(scores > 0.6)[0]
    boxes = boxes[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = np.argsort(-scores)[:top_k]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do nms
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(float, copy=False)
    keep = py_cpu_nms(dets, nms_threshold)
    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K fater NMS
    dets = dets[:keep_top_k, :]
    landms = landms[:keep_top_k, :]
    dets = np.concatenate((dets, landms), axis=1)

    result_data = dets[:, :5].tolist()

    return result_data
Ejemplo n.º 7
0
def pipeline(net, frame, args, device, resize, cfg):
    img = np.float32(frame)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    loc, conf, landms = net(img)  # forward pass
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([
        img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3],
        img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
    ])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > args.confidence_threshold)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:args.top_k]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, args.nms_threshold)
    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    dets = dets[:args.keep_top_k, :]
    landms = landms[:args.keep_top_k, :]
    dets = np.concatenate((dets, landms), axis=1)

    objects_to_draw = dict(draw_box=True, draw_text=True, draw_landmarks=True)
    frame = draw(frame, dets, args.vis_thres, **objects_to_draw)
    return frame
Ejemplo n.º 8
0
    def predict(self, img_name):
        img = np.float32(cv2.imread(img_name, cv2.IMREAD_COLOR))
        resize = 1
        if resize != 1:
            img = cv2.resize(img,
                             None,
                             None,
                             fx=resize,
                             fy=resize,
                             interpolation=cv2.INTER_LINEAR)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        _t = {'forward_pass': Timer(), 'misc': Timer()}
        _t['forward_pass'].tic()
        loc, conf = self.net(img)  # forward pass
        _t['forward_pass'].toc()
        _t['misc'].tic()
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > self.confidence_threshold)[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.top_k]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        #keep = py_cpu_nms(dets, self.nms_threshold)
        keep = nms(dets, self.nms_threshold, force_cpu=self.cpu)
        dets = dets[keep, :]

        # keep top-K faster NMS
        dets = dets[:self.keep_top_k, :]
        _t['misc'].toc()

        return dets
Ejemplo n.º 9
0
def GetFacialPoints(img_raw):
    img = np.float32(img_raw)
    height, width, _ = img_raw.shape
    scale = torch.Tensor([width, height, width, height])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    loc, conf, landms = net(img)  # forward pass

    priorbox = PriorBox(cfg, image_size=(height, width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / config.resize
    boxes = boxes.cpu().detach().numpy()
    scores = conf.squeeze(0).data.cpu().detach().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([
        img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3],
        img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
    ])
    scale1 = scale1.to(device)
    landms = landms * scale1 / config.resize
    landms = landms.cpu().detach().numpy()

    # ignore low scores
    inds = np.where(scores > config.confidence_threshold)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:config.top_k]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, config.nms_threshold)
    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    dets = dets[:config.keep_top_k, :]
    landms = landms[:config.keep_top_k, :]

    dets = np.concatenate((dets, landms), axis=1)
    torch.cuda.empty_cache()
    return dets
Ejemplo n.º 10
0
    def detect_image(self, img) -> List[FaceDetection]:
        # TODO: add detect logic for single image
        print(np.shape(img))
        tic = time.time()
        img = np.float32(img)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        loc, conf, landms = self.net(img)  # forward pass
        
        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
        boxes = boxes * scale / self.resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:args.top_k]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)

        keep = py_cpu_nms(dets, args.nms_threshold)

        dets = dets[keep, :]

        dets = dets[:args.keep_top_k, :]

        # show image
        box_list = []
        for b in dets:
            if b[4] < args.vis_thres:
                continue
            score = b[4]
            b = list(map(int, b))
            box_list.append(FaceDetection(b[0], b[1], b[2], b[3], 0, score))

        print('net forward time: {:.4f}'.format(time.time() - tic))

        return box_list
Ejemplo n.º 11
0
def decode_output(image, detection_boxes, detection_scores, detection_landmark, cfg_plate):
    # print(image.shape[2:])
    image_h, image_w = image.shape[2:]
    # image_h, image_w, _ = image.shape
    # cfg_plate['image_size'] = (480, 640)
    detection_scores = F.softmax(detection_scores, dim=-1)
    # detection_scores = detection_scores.cpu().detach().numpy()
    # priorbox = PriorBox(cfg_plate,
    #                     image_size=(cfg_plate['image_size'], cfg_plate['image_size']), phase='test')  # height, width
    priorbox = PriorBox(cfg_plate,
                        image_size=(image_h, image_w), phase='test')  # height, width
    priors = priorbox.forward()
    priors = priors.to(torch.device('cuda'))
    prior_data = priors.data
    boxes = decode(detection_boxes.data.squeeze(0), prior_data, cfg_plate['variance'])
    # boxes[:, 0::2] = boxes[:, 0::2] * cfg_plate['image_size']  # width
    # boxes[:, 1::2] = boxes[:, 1::2] * cfg_plate['image_size']  # height
    boxes[:, 0::2] = boxes[:, 0::2] * image_w  # width
    boxes[:, 1::2] = boxes[:, 1::2] * image_h  # height
    boxes = boxes.cpu().numpy()
    scores = scores = detection_scores.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(detection_landmark.data.squeeze(0), prior_data, cfg_plate['variance'])
    # landms[:, 0::2] = landms[:, 0::2] * cfg_plate['image_size']
    # landms[:, 1::2] = landms[:, 1::2] * cfg_plate['image_size']
    landms[:, 0::2] = landms[:, 0::2] * image_w
    landms[:, 1::2] = landms[:, 1::2] * image_h
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > cfg_plate['confidence_threshold'])[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:cfg_plate['top_k']]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
    keep = py_cpu_nms(dets, cfg_plate['nms_threshold'])
    # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    dets = dets[:cfg_plate['keep_top_k'], :]
    landms = landms[:cfg_plate['keep_top_k'], :]
    dets = np.concatenate((dets, landms), axis=1)
    # draw_ouput2(image, dets)
    return dets
Ejemplo n.º 12
0
def detect_faces(ops, detect_model, img_raw, device):
    resize = 1
    img = np.float32(img_raw)
    if resize != 1:
        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    loc, conf = detect_model(img)  # forward pass

    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

    # ignore low scores
    inds = np.where(scores > ops.confidence_threshold)[0]
    boxes = boxes[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:ops.top_k]
    boxes = boxes[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    #keep = py_cpu_nms(dets, ops.nms_threshold)
    # keep = nms(dets, ops.nms_threshold,force_cpu=True)
    keep = py_cpu_nms(dets, ops.nms_threshold)
    dets = dets[keep, :]

    # keep top-K faster NMS
    dets = dets[:ops.keep_top_k, :]

    return dets
Ejemplo n.º 13
0
 def set_default_size(self,imgshape=[640,480,3]):#[H,W,nCh]
     im_height, im_width, im_nch = imgshape
     if im_height == self.im_height and im_width == self.im_width and self._priors is not None:
         pass
     else:
         self.im_height,self.im_width,self.im_nch = imgshape
         """
         priorbox shape [-1,4]; dim0: number of predicted bbox from network; dim1:[x_center,y_center,w,h]
         priorbox存储的内容分别是bbox中心点的位置以及人脸预设的最小尺寸,长宽比例通过variance解决
         这里的数值都是相对图像尺寸而言的相对值,取值在(0,1)之间
         """
         priorbox = PriorBox(self.cfg,image_size=(self.im_height,self.im_width))
         self._priors = priorbox.forward()
Ejemplo n.º 14
0
    def get_bbox(self, img_raw):
        img = torch.FloatTensor(img_raw).to(self.device)
        im_height, im_width, _ = img.size()
        scale = torch.FloatTensor([im_width, im_height, im_width,
                                   im_height]).to(self.device)
        img -= torch.FloatTensor((104, 117, 123)).to(self.device)
        img = img.permute(2, 0, 1).unsqueeze(0)

        loc, conf = self.net(img)  # forward pass

        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > 0.05)[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:5000]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        # keep = py_cpu_nms(dets, args.nms_threshold)
        keep = nms(dets, 0.3, force_cpu=False)
        dets = dets[keep, :]

        # keep top-K faster NMS
        dets = dets[:750, :]
        bboxes = []
        for b in dets:
            if b[4] < 0.65:
                continue
            b = list(map(int, b))

            bboxes.append((b[0], b[1], b[2], b[3]))

        return bboxes
Ejemplo n.º 15
0
    def decode_params(self, height, width):
        cache_key = (height, width)

        try:
            return self.decode_param_cache[cache_key]
        except KeyError:
            priorbox = PriorBox(self.cfg, image_size=(height, width))
            priors = priorbox.forward()

            prior_data = priors.data
            scale = torch.Tensor([width, height] * 2)
            scale1 = torch.Tensor([width, height] * 5)

            result = (prior_data, scale, scale1)
            self.decode_param_cache[cache_key] = result

            return result
    def facebox_detect(self, img_raw):
        img = np.float32(img_raw)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        loc, conf = self.model(img)  # forward pass
        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
        boxes = boxes * scale
        boxes = boxes.cpu().numpy()
        scores = conf.data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > self.cfg['confidence_threshold'])[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.cfg['top_k']]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        #keep = py_cpu_nms(dets, args.nms_threshold)
        keep = nms(dets, self.cfg['nms_threshold'], False)
        dets = dets[keep, :]

        # keep top-K faster NMS
        boxes_score = dets[:self.cfg['keep_top_k'], :]
        # boxes_score[:, :-1] += 1
        # remove the locat is not positive
        po_ng = np.array([np.any(box < 0) for box in boxes_score])
        boxes_score = boxes_score[np.where(po_ng == False)]

        return boxes_score
Ejemplo n.º 17
0
    def face_location(self, img, resize=1, cof=0.5):
        # 处理图片
        img = np.float32(img)
        if resize != 1:
            img = cv2.resize(img,
                             None,
                             None,
                             fx=resize,
                             fy=resize,
                             interpolation=cv2.INTER_LINEAR)
        im_height, im_width, _ = img.shape
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        # 前向传播
        loc, conf = self.net(img)  # forward pass
        #
        priors = priorbox.forward()
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        # ignore low scores
        inds = np.where(scores > self.confidence_threshold)[0]
        boxes = boxes[inds]
        scores = scores[inds]
        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.top_k]
        boxes = boxes[order]
        scores = scores[order]
        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        # keep = py_cpu_nms(dets, args.nms_threshold)
        keep = nms(dets, self.nms_threshold)
        dets = dets[keep, :]

        # keep top-K faster NMS
        dets = dets[:self.keep_top_k, :]
        # 筛选出置信度较高的人脸
        dets = dets[dets[:, 4] > cof, :4]
        return dets
Ejemplo n.º 18
0
    def detect_faces(self, img, resize=1.0):
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        loc, conf = self.net(img)  # forward pass
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > self.args.confidence_threshold)[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.args.top_k]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        # keep = py_cpu_nms(dets, self.args.nms_threshold)
        keep = nms(dets, self.args.nms_threshold, force_cpu=self.args.cpu)
        dets = dets[keep, :]

        # keep top-K faster NMS
        dets = dets[:self.args.keep_top_k, :]

        return dets
    def nms_process(self, network_output, scale, im_height, im_width) -> List[TrackingRegion]:
        priorbox = PriorBox(cfg, network_output[2], (im_height, im_width), phase='test')
        priors = priorbox.forward()
        if self.use_gpu:
            priors = priors.cuda()
        loc, conf, _ = network_output
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale
        boxes = boxes.cpu().numpy()
        scores = conf.data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > self.score_min)[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS, top_k = 5
        order = scores.argsort()[::-1][:5000]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
        keep = nms(dets, 0.3, force_cpu=False)
        dets = dets[keep, :]

        # keep top-K faster NMS
        dets = dets[:750, :]

        regions = []

        for i in range(dets.shape[0]):
            face_region = TrackingRegion()
            face_region.set_rect(left=dets[i, 0], top=dets[i, 1], right=dets[i, 2], bottom=dets[i, 3])
            face_region.confidence = dets[i, 4]
            face_region.data["class_id"] = "face"
            regions.append(face_region)

        return regions
Ejemplo n.º 20
0
def detect(sample: Union[np.ndarray, Tensor], model: torch.nn.Module,
           cfg: Dict[str, any], device: torch.device) -> List[np.ndarray]:
    num_frames, height, width, ch = sample.shape
    bs = cfg['batch_size']
    bs = adjust_bs(bs, height, width)
    imgs, scale = prepare_imgs(sample)

    priorbox = PriorBox(cfg, image_size=(height, width))
    priors = priorbox.forward().to(device)
    scale = scale.to(device)
    detections = []

    for start in range(0, num_frames, bs):
        end = start + bs
        imgs_batch = imgs[start:end].to(device)
        with torch.no_grad():
            loc, conf, landms = model(imgs_batch)
        imgs_batch, landms = None, None
        dets = postproc_detections(loc, conf, priors, scale, cfg)
        detections.extend(dets)
        loc, conf = None, None
    return detections
def find_faces(frames: Tensor, model: torch.nn.Module, device: torch.device,
               conf: Dict[str, Any]) -> List[Tensor]:
    D, H, W, C = frames.shape
    frames_orig = frames.permute(0, 3, 1, 2)
    frames, scale = prepare_imgs(frames)
    prior_box = PriorBox(conf, image_size=(H, W))
    priors = prior_box.forward().to(device)
    scale = scale.to(device)

    with torch.no_grad():
        locations, confidence, landmarks = model(frames)
        detections = postproc_detections_gpu(locations, confidence, priors,
                                             scale, conf)

    num_faces = np.array(list(map(len, detections)), dtype=np.uint8)
    while (num_faces.mean() < conf['min_positive_rate']
           and conf['score_thresh'] >= conf['score_thresh_min']):
        conf = dict(conf)
        conf['score_thresh'] -= conf['score_thresh_step']
        detections = postproc_detections_gpu(locations, confidence, priors,
                                             scale, conf)
        num_faces = np.array(list(map(len, detections)), dtype=np.uint8)
        logging.debug(
            "Rerun detection postprocessing with score_thresh={:.02f}, "
            "avg_pos_rate={:.02f}".format(conf['score_thresh'],
                                          num_faces.mean()))

    max_faces = max_num_faces(num_faces, conf['max_face_num_thresh'])
    del locations, confidence, landmarks, priors, prior_box, scale, frames

    faces = []
    for f in range(D):
        for bbox in detections[f][:max_faces]:
            face = crop_square_torch(frames_orig[f], bbox[:4])
            if face is not None:
                faces.append(face)
    del detections, frames_orig
    return faces
Ejemplo n.º 22
0
        print("The image shape is ", img.shape)
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)

        tic = time.time()
        loc, conf, landms = net(img)  # forward pass

        print('net forward time: {:.4f}'.format(time.time() - tic))

        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data

        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              cfg['variance'])

        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
    def Predict(self,
                img_path="test.jpg",
                thresh=0.5,
                out_img_path="result.jpg"):
        image_path = img_path
        confidence_threshold = thresh
        vis_thres = thresh
        nms_threshold = 0.4
        top_k = 1000
        keep_top_k = 750
        save_image = True
        name = out_img_path

        device = self.system_dict["local"]["device"]
        net = self.system_dict["local"]["net"]
        cfg = self.system_dict["local"]["cfg"]

        resize = 1
        img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
        img = np.float32(img_raw)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)

        tic = time.time()
        loc, conf, landms = net(img)  # forward pass
        print('net forward time: {:.4f}'.format(time.time() - tic))

        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              cfg['variance'])
        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
        ])
        scale1 = scale1.to(device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:keep_top_k, :]
        landms = landms[:keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)

        # show image
        tmp = {}
        tmp["scores"] = []
        tmp["bboxes"] = []
        tmp["labels"] = []

        for b in dets:
            if b[4] < vis_thres:
                continue
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5,
                        (255, 255, 255))
            tmp["scores"].append(b[4])
            tmp["bboxes"].append([b[0], b[1], b[2], b[3]])
            tmp["labels"].append(text)

            # landms
            #cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
            #cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
            #cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
            #cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
            #cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
            # save image

        if save_image:
            cv2.imwrite(name, img_raw)

        return tmp
Ejemplo n.º 24
0
class SSD(nn.Module):
    """Single Shot Multibox Architecture
    The network is composed of a base VGG network followed by the
    added multibox conv layers.  Each multibox layer branches into
        1) conv2d for class conf scores
        2) conv2d for localization predictions
        3) associated priorbox layer to produce default bounding
           boxes specific to the layer's feature map size.
    See: https://arxiv.org/pdf/1512.02325.pdf for more details.

    Args:
        phase: (string) Can be "test" or "train"
        base: VGG16 layers for input, size of either 300 or 500
        extras: extra layers that feed to multibox loc and conf layers
        head: "multibox head" consists of loc and conf conv layers
    """
    def __init__(self, phase, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        # TODO: implement __call__ in PriorBox
        self.priorbox = PriorBox(v2)

        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = 512

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        # fused conv4_3 and conv5_3
        self.conv3_3 = nn.Conv2d(256, 256, 3, 1, 1)
        self.conv4_3 = nn.Conv2d(512, 512, 3, 1, 1)
        self.deconv = nn.ConvTranspose2d(512, 512, 2, 2)
        self.deconv2 = nn.ConvTranspose2d(512, 256, 2, 2)
        self.conv5_3 = nn.Conv2d(512, 512, 3, 1, 1)
        self.L2Norm5_3 = L2Norm(512, 10)
        self.L2Norm3_3 = L2Norm(256, 20)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if self.phase == 'test':
            self.softmax = nn.Softmax()
            self.detect = Detect(num_classes, 0, 300, 0.01, 0.45)

    def forward(self, x):
        """Applies network layers and ops on input image(s) x.
        Args:
            x: input image or batch of images. Shape: [batch,3,300,300].
        Return:
            Depending on phase:
            test:
                Variable(tensor) of output class label predictions,
                confidence score, and corresponding location predictions for
                each object detected. Shape: [batch,topk,7]
            train:
                list of concat outputs from:
                    1: confidence layers, Shape: [batch*num_priors,num_classes]
                    2: localization layers, Shape: [batch,num_priors*4]
                    3: priorbox layers, Shape: [2,num_priors*4]
        """
        sources = list()
        loc = list()
        conf = list()

        # Apply vgg up to conv4_3 relu
        # Fused conv4_3,conv5_3
        for k in range(16):
            x = self.vgg[k](x)
        conv3_3 = self.conv3_3(x)
        s3_3 = self.L2Norm3_3(conv3_3)

        for k in range(16, 23):
            x = self.vgg[k](x)
        conv4_3 = self.conv4_3(x)
        s4_3 = self.L2Norm(conv4_3)

        for k in range(23, 30):
            x = self.vgg[k](x)
        deconv = self.deconv(x)
        conv5_3 = self.conv5_3(deconv)
        s5_3 = self.L2Norm5_3(conv5_3)

        s2 = F.relu(s4_3 + s5_3)
        s1 = F.relu(s3_3 + self.deconv2(s2))
        sources.extend([s1, s2])

        # apply vgg up to fc7
        for k in range(30, len(self.vgg)):
            x = self.vgg[k](x)
        sources.append(x)

        # Apply extra layers and cache source layer outputs
        for k, v in enumerate(self.extras):
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)

        # Apply multibox head to source layers
        for (x, l, c) in zip(sources, self.loc, self.conf):
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)

        if self.phase == 'test':
            conf_preds = conf.view(-1, self.num_classes)
            conf_preds = self.softmax(conf_preds).view(conf.size(0), -1,
                                                       self.num_classes)
            # TODO 测试
            loc = loc.view(loc.size(0), -1, 4)
            # print(loc.size())
            # print(conf_preds.size())
            # print(self.priors.size())
            output = self.detect(
                loc.view(loc.size(0), -1, 4),  # loc preds
                conf_preds,
                self.priors.type(type(x.data))  # default boxes
            )
        else:
            output = (loc.view(loc.size(0), -1,
                               4), conf.view(conf.size(0), -1,
                                             self.num_classes), self.priors)

        return output

    def load_weights(self, base_file):
        other, ext = os.path.splitext(base_file)
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict ...')
            self.load_state_dict(
                torch.load(base_file,
                           map_location=lambda storage, loc: storage))
            print('Finished!')

        else:
            print("Sorry only .pth or .pkl files supported.")

    def load_weights_fused(self, base_file):
        other, ext = os.path.splitext(base_file)
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict ...')
            params = torch.load(base_file,
                                map_location=lambda storage, loc: storage)
            own_dict = self.state_dict()
            for k, v in list(own_dict.items())[:51]:
                param = params.get(k)
                if k == "extras.7.weight" or param is None:
                    continue
                v.copy_(param)
            print('Finished!')

        else:
            print("Sorry only .pth or .pkl files supported.")

    def load_weights_for_rosd(self, base_file):
        other, ext = os.path.splitext(base_file)
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict ...')
            params = torch.load(base_file,
                                map_location=lambda storage, loc: storage)
            own_dict = self.state_dict()
            for k, v in list(own_dict.items())[:-28]:
                param = params.get(k)
                if param is None:
                    continue
                v.copy_(param)
            print('Finished!')

        else:
            print("Sorry only .pth or .pkl files supported.")
def process_frames(
    torched_frames: torch.tensor,
    is_fp16: bool,
    resize_factor: float,
    video_path: Path,
    frame_ids: np.array,
    frames: np.array,
    device: str,
    batch_size: int,
    cfg: dict,
    nms_threshold: float,
    confidence_threshold: float,
    is_save_crops: bool,
    is_save_boxes: bool,
    output_path: Path,
    net: torch.nn.Module,
    min_size: int,
    resize_scale: float,
    keep_top_k: Optional[int],
) -> None:
    if is_save_crops and output_path is not None:
        output_image_path = output_path / "images"
        output_image_path.mkdir(exist_ok=True, parents=True)

    if is_save_boxes and output_path is not None:
        output_label_path: Path = output_path / "labels"
        output_label_path.mkdir(exist_ok=True, parents=True)

    if is_fp16:
        torched_frames = torched_frames.half()

    num_frames = torched_frames.shape[0]

    video_id = video_path.stem

    labels: List[dict] = []

    image_height, image_width = torched_frames.shape[2:]

    scale1 = torch.Tensor([
        image_width,
        image_height,
        image_width,
        image_height,
        image_width,
        image_height,
        image_width,
        image_height,
        image_width,
        image_height,
    ])

    scale1 = scale1.to(device)

    scale = torch.Tensor(
        [image_width, image_height, image_width, image_height])
    scale = scale.to(device)

    priorbox = PriorBox(cfg, image_size=(image_height, image_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data

    for start_index in range(0, num_frames, batch_size):
        end_index = min(start_index + batch_size, num_frames)

        loc, conf, land = net(torched_frames[start_index:end_index].to(device))

        for pred_id in range(loc.shape[0]):
            frame_id = frame_ids[start_index + pred_id]

            boxes = decode(loc.data[pred_id], prior_data, cfg["variance"])

            boxes *= scale / resize_factor
            scores = conf[pred_id][:, 1]

            landmarks = decode_landm(land.data[pred_id], prior_data,
                                     cfg["variance"])
            landmarks *= scale1 / resize_factor

            # ignore low scores
            valid_index = torch.where(scores > confidence_threshold)[0]
            boxes = boxes[valid_index]
            landmarks = landmarks[valid_index]
            scores = scores[valid_index]

            order = scores.argsort(descending=True)

            boxes = boxes[order][:keep_top_k, :]
            landmarks = landmarks[order][:keep_top_k, :]
            scores = scores[order][:keep_top_k]

            # do NMS
            keep = nms(boxes, scores, nms_threshold)
            boxes = boxes[keep, :].int()

            landmarks = landmarks[keep].int()

            if boxes.shape[0] == 0:
                continue

            scores = scores[keep].cpu().numpy().astype(np.float64)

            for crop_id, bbox in enumerate(boxes):
                bbox = bbox.cpu().numpy().tolist()

                x_min, y_min, x_max, y_max = bbox

                x_min = np.clip(x_min, 0, image_width - 1)
                y_min = np.clip(y_min, 0, image_height - 1)

                x_max = np.clip(x_max, x_min + 1, image_width - 1)
                y_max = np.clip(y_max, y_min + 1, image_height - 1)

                crop_width = x_max - x_min
                crop_hegith = y_max - y_min

                if crop_width < min_size or crop_hegith < min_size:
                    continue

                labels += [{
                    "frame_id": int(frame_id),
                    "crop_id": crop_id,
                    "bbox": [x_min, y_min, x_max, y_max],
                    "score": scores[crop_id],
                    "landmarks": landmarks[crop_id].tolist(),
                }]

                if is_save_crops:
                    x_min, y_min, x_max, y_max = resize(
                        x_min,
                        y_min,
                        x_max,
                        y_max,
                        image_height,
                        image_width,
                        resize_coeff=resize_scale)

                    crop = frames[pred_id][y_min:y_max, x_min:x_max]

                    target_folder = output_image_path / f"{video_id}"
                    target_folder.mkdir(exist_ok=True, parents=True)

                    crop_file_path = target_folder / f"{frame_id}_{crop_id}.jpg"

                    if crop_file_path.exists():
                        continue

                    cv2.imwrite(
                        str(crop_file_path),
                        cv2.cvtColor(crop, cv2.COLOR_BGR2RGB),
                        [int(cv2.IMWRITE_JPEG_QUALITY), 90],
                    )

            if is_save_boxes:
                result = {
                    "file_path": str(video_path),
                    "file_id": video_id,
                    "bboxes": labels,
                }

                with open(output_label_path / f"{video_id}.json", "w") as f:
                    json.dump(result, f, indent=2)
Ejemplo n.º 26
0
    def detection_image(self, image):
        """
        in:mat data
        out:lst([[score,xmin,ymin,xmax,ymax]])

        """
        lst = []

        if len(image.shape) == 3:
            h, w, _ = image.shape
        elif len(image.shape) == 2:
            h, w = image.shape
        else:
            return 0
        image_resize = cv2.resize(image, (256, int(256 * h / w)))
        image_resize = np.float32(image_resize)
        if len(image_resize.shape) == 3:
            im_height, im_width, _ = image_resize.shape
        elif len(image_resize.shape) == 2:
            im_height, im_width = image_resize.shape

        scale = torch.Tensor([w, h, w, h])

        image_resize -= self.mean
        image_resize /= self.val

        img = image_resize.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        out = self.net(img)
        priorbox = PriorBox(self.cfg,
                            out[2], (im_height, im_width),
                            phase="test")
        priors = priorbox.forward()
        priors = priors.to(self.device)
        loc, conf, _ = out
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale
        boxes = boxes.cpu().numpy()

        scores = conf.data.cpu().numpy()[:, 1]
        inds = np.where(scores > self.confidence_threshold)[0]
        boxes1 = boxes[inds]
        scores = scores[inds]
        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.top_k]
        boxes1 = boxes1[order]
        scores = scores[order]
        dets = np.hstack((boxes1, scores[:, np.newaxis])).astype(np.float32,
                                                                 copy=False)
        keep = nms(dets, self.nms_threshold, force_cpu=self.use_cpu)
        dets = dets[keep, :]
        dets = dets[:self.keep_top_k, :]
        for k in range(dets.shape[0]):
            face_rectangle = {}
            xmin = dets[k, 0]
            ymin = dets[k, 1]
            xmax = dets[k, 2]
            ymax = dets[k, 3]
            score = dets[k, 4]
            if score > self.yuzhi:
                lst.append([score, int(xmin), int(ymin), int(xmax), int(ymax)])

        return lst
Ejemplo n.º 27
0
def face_detector(frame):
    img_raw = frame.copy()
    img = np.float32(img_raw)
    if resize != 1:
        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    loc, conf, landms = net(img)  # forward pass
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([
        img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3],
        img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
    ])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > CONFIDENCE)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:5000]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, NMS_THRESHOLD)
    # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    dets = dets[:750, :]
    landms = landms[:750, :]

    dets = np.concatenate((dets, landms), axis=1)

    bboxs = []
    for b in dets:
        if b[4] < VIZ_THRESHOLD:
            continue
        b = list(map(int, b))

        margin = 5

        x1, y1, x2, y2 = b[0], b[1], b[2], b[3]

        img_h, img_w, _ = frame.shape
        w = x2 - x1
        h = y2 - y1
        margin = int(min(w, h) * margin / 100)
        x_a = x1 - margin
        y_a = y1 - margin
        x_b = x1 + w + margin
        y_b = y1 + h + margin
        if x_a < 0:
            x_b = min(x_b - x_a, img_w - 1)
            x_a = 0
        if y_a < 0:
            y_b = min(y_b - y_a, img_h - 1)
            y_a = 0
        if x_b > img_w:
            x_a = max(x_a - (x_b - img_w), 0)
            x_b = img_w
        if y_b > img_h:
            y_a = max(y_a - (y_b - img_h), 0)
            y_b = img_h

        name = ""
        face = frame[y_a:y_b, x_a:x_b]
        rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
        encodings = face_recognition.face_encodings(rgb,
                                                    [(y_a, x_b, y_b, x_a)])
        matches = face_recognition.compare_faces(face_data["encodings"],
                                                 encodings[0],
                                                 tolerance=0.55)
        if True in matches:
            matchedIdxs = [i for (i, b) in enumerate(matches) if b]
            counts = {}

            for i in matchedIdxs:
                name = face_data["names"][i]
                counts[name] = counts.get(name, 0) + 1

            name = max(counts, key=counts.get)
        cv2.putText(img_raw, name, (x_a + 10, y_a), cv2.FONT_HERSHEY_SIMPLEX,
                    1, (255, 0, 255), 1, cv2.LINE_AA)
        cv2.rectangle(img_raw, (x_a, y_a), (x_b, y_b), (255, 0, 0), 1)
        bboxs.append([x_a, y_a, x_b, y_b])

    return img_raw, bboxs
Ejemplo n.º 28
0
    def detect_faces(self, img_raw):

        img = np.float32(img_raw)

        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        if self.on_gpu:
            img = img.to(self.device)
            scale = scale.to(self.device)
        # graph = 0
        tic = time.time()
        loc, conf, landms = self.detector(img)  # forward pass
        print('net forward time: {:.4f}'.format(time.time() - tic))

        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        if self.on_gpu:
            priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
        boxes = boxes * scale / self.resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              self.cfg['variance'])
        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
        ])
        if self.on_gpu:
            scale1 = scale1.to(self.device)
        landms = landms * scale1 / self.resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > self.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, self.nms_threshold)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:self.keep_top_k, :]
        landms = landms[:self.keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)

        faces = []
        for f in dets:
            # fr: top, right, bottom, left
            # retina: left, right, bottom, top
            faces.append((int(f[1]), int(f[2]), int(f[3]), int(f[0])))

        return faces
Ejemplo n.º 29
0
def do_inference(net, img_raw):

    img = np.float32(img_raw)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    tic = time.time()
    loc, conf, landms = net(img)  # forward pass
    print('net forward time: {:.4f}'.format(time.time() - tic))

    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([
        img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3],
        img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
    ])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > args.confidence_threshold)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:args.top_k]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, args.nms_threshold)
    # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    dets = dets[:args.keep_top_k, :]
    landms = landms[:args.keep_top_k, :]

    dets = np.concatenate((dets, landms), axis=1)

    # show image
    if args.save_image:
        for b in dets:
            if b[4] < args.vis_thres:
                continue
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5,
                        (255, 255, 255))

            # landms
            cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
Ejemplo n.º 30
0
def main():
    args = get_args()
    torch.set_grad_enabled(False)
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase="test")
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    print("Finished loading model!")
    print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    args.save_folder.mkdir(exist_ok=True)

    fw = open(os.path.join(args.save_folder, args.dataset + "_dets.txt"), "w")

    # testing dataset
    testset_folder = os.path.join("data", args.dataset, "images/")
    testset_list = os.path.join("data", args.dataset, "img_list.txt")
    with open(testset_list, "r") as fr:
        test_dataset = fr.read().split()
    num_images = len(test_dataset)

    # testing scale
    resize = 1

    _t = {"forward_pass": Timer(), "misc": Timer()}

    # testing begin
    for i, img_name in enumerate(test_dataset):
        image_path = testset_folder + img_name + ".jpg"
        img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)

        img = np.float32(img_raw)
        if resize != 1:
            img = cv2.resize(img,
                             None,
                             None,
                             fx=resize,
                             fy=resize,
                             interpolation=cv2.INTER_LINEAR)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)

        _t["forward_pass"].tic()
        loc, conf, landms = net(img)  # forward pass
        _t["forward_pass"].toc()
        _t["misc"].tic()
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              cfg["variance"])
        scale1 = torch.Tensor([
            img.shape[3],
            img.shape[2],
            img.shape[3],
            img.shape[2],
            img.shape[3],
            img.shape[2],
            img.shape[3],
            img.shape[2],
            img.shape[3],
            img.shape[2],
        ])
        scale1 = scale1.to(device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        # order = scores.argsort()[::-1][:args.top_k]
        order = scores.argsort()[::-1]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, args.nms_threshold)

        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        # dets = dets[:args.keep_top_k, :]
        # landms = landms[:args.keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)
        _t["misc"].toc()

        # save dets
        if args.dataset == "FDDB":
            fw.write("{:s}\n".format(img_name))
            fw.write("{:.1f}\n".format(dets.shape[0]))
            for k in range(dets.shape[0]):
                xmin = dets[k, 0]
                ymin = dets[k, 1]
                xmax = dets[k, 2]
                ymax = dets[k, 3]
                score = dets[k, 4]
                w = xmax - xmin + 1
                h = ymax - ymin + 1
                # fw.write('{:.3f} {:.3f} {:.3f} {:.3f} {:.10f}\n'.format(xmin, ymin, w, h, score))
                fw.write("{:d} {:d} {:d} {:d} {:.10f}\n".format(
                    int(xmin), int(ymin), int(w), int(h), score))
        print("im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s".
              format(i + 1, num_images, _t["forward_pass"].average_time,
                     _t["misc"].average_time))

        # show image
        if args.save_image:
            for b in dets:
                if b[4] < args.vis_thres:
                    continue
                text = "{:.4f}".format(b[4])
                b = list(map(int, b))
                cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                              2)
                cx = b[0]
                cy = b[1] + 12
                cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                            0.5, (255, 255, 255))

                # landms
                cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
                cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
                cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
                cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
                cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
            # save image
            if not os.path.exists("./results/"):
                os.makedirs("./results/")
            name = "./results/" + str(i) + ".jpg"
            cv2.imwrite(name, img_raw)

    fw.close()