Example #1
0
 def __init__(self, net='mtcnn', type='cuda'):
     cudnn.benchmark = True
     self.net = net
     self.device = torch.device(type)
     self.pnet = PNet().to(self.device)
     self.rnet = RNet().to(self.device)
     self.onet = ONet().to(self.device)
Example #2
0
    def create_mtcnn_net(self):
        ''' Create the mtcnn model '''
        pnet, rnet, onet = None, None, None

        if len(self.args.pnet) > 0:
            pnet = PNet(use_cuda=self.use_gpu)
            if self.use_gpu:
                pnet.load_state_dict(torch.load(self.args.pnet))
                pnet = torch.nn.DataParallel(pnet, device_ids=self.gpu_ids)
            else:
                pnet.load_state_dict(
                    torch.load(self.args.pnet,
                               map_location=lambda storage, loc: storage))
            pnet.eval()

        if len(self.args.rnet) > 0:
            rnet = RNet(use_cuda=self.use_gpu)
            if self.use_gpu:
                rnet.load_state_dict(torch.load(self.args.rnet))
                rnet = torch.nn.DataParallel(rnet, device_ids=self.gpu_ids)
            else:
                rnet.load_state_dict(
                    torch.load(self.args.rnet,
                               map_location=lambda storage, loc: storage))
            rnet.eval()

        if len(self.args.onet) > 0:
            onet = ONet(use_cuda=self.use_gpu)
            if self.use_gpu:
                onet.load_state_dict(torch.load(self.args.onet))
                onet = torch.nn.DataParallel(onet, device_ids=self.gpu_ids)
            else:
                onet.load_state_dict(
                    torch.load(self.args.onet,
                               map_location=lambda storage, loc: storage))
            onet.eval()

        self.pnet_detector = pnet
        self.rnet_detector = rnet
        self.onet_detector = onet
Example #3
0
def create_mtcnn_net(p_model_path=None, r_model_path=None, o_model_path=None, use_cuda=True):
    """
    模型加载,默认使用cpu,正常使用GPU

    """

    pnet, rnet, onet = None, None, None

    if p_model_path is not None:
        pnet = PNet(use_cuda=use_cuda)
        if(use_cuda):
            print('p_model_path:{0}'.format(p_model_path))
            pnet.load_state_dict(torch.load(p_model_path))
            pnet.cuda()
        else:
            pnet.load_state_dict(torch.load(p_model_path, map_location=lambda storage, loc: storage))
        pnet.eval()

    if r_model_path is not None:
        rnet = RNet(use_cuda=use_cuda)
        if (use_cuda):
            print('r_model_path:{0}'.format(r_model_path))
            rnet.load_state_dict(torch.load(r_model_path))
            rnet.cuda()
        else:
            rnet.load_state_dict(torch.load(r_model_path, map_location=lambda storage, loc: storage))
        rnet.eval()

    if o_model_path is not None:
        onet = ONet(use_cuda=use_cuda)
        if (use_cuda):
            print('o_model_path:{0}'.format(o_model_path))
            onet.load_state_dict(torch.load(o_model_path))
            onet.cuda()
        else:
            onet.load_state_dict(torch.load(o_model_path, map_location=lambda storage, loc: storage))
        onet.eval()

    return pnet,rnet,onet
Example #4
0
    """
    for b in bounding_boxes:
        cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])),
                      (255, 255, 255), 1)

    for p in facial_landmarks:
        for i in range(5):
            cv2.circle(img, (int(p[i]), int(p[i + 5])), 1, (0, 255, 0), -1)

    return img


# LOAD MODELS
pnet = PNet().to(device)
rnet = RNet().to(device)
onet = ONet().to(device)

if __name__ == '__main__':
    video_src = './mtcnn/video/1.mp4'  # video source
    # video_src = 0 # camera device id
    capture = cv2.VideoCapture(video_src)
    if not capture.isOpened():
        print('Camera is not opened!')
    else:
        idx_frame = 0
        while True:
            ret, frame = capture.read()
            idx_frame += 1
            if idx_frame % 2 != 0:
                continue
            idx_frame = 0
Example #5
0
def detect_faces(image,
                 min_face_size=20.0,
                 thresholds=[0.6, 0.7, 0.8],
                 nms_thresholds=[0.7, 0.7, 0.7]):
    """
    Arguments:
        image: an instance of PIL.Image.
        min_face_size: a float number.
        thresholds: a list of length 3.
        nms_thresholds: a list of length 3.
    Returns:
        two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
        bounding boxes and facial landmarks.
    """

    with torch.no_grad():
        # LOAD MODELS
        pnet = PNet().to(device)
        rnet = RNet().to(device)
        onet = ONet().to(device)
        onet.eval()

        # BUILD AN IMAGE PYRAMID
        width, height = image.size
        min_length = min(height, width)

        min_detection_size = 12
        factor = 0.707  # sqrt(0.5)

        # scales for scaling the image
        scales = []

        # scales the image so that
        # minimum size that we can detect equals to
        # minimum face size that we want to detect
        m = min_detection_size / min_face_size
        min_length *= m

        factor_count = 0
        while min_length > min_detection_size:
            scales.append(m * factor**factor_count)
            min_length *= factor
            factor_count += 1

        # STAGE 1

        # it will be returned
        bounding_boxes = []

        # run P-Net on different scales
        for s in scales:
            boxes = run_first_stage(image,
                                    pnet,
                                    scale=s,
                                    threshold=thresholds[0])
            bounding_boxes.append(boxes)

        # collect boxes (and offsets, and scores) from different scales
        bounding_boxes = [i for i in bounding_boxes if i is not None]
        bounding_boxes = np.vstack(bounding_boxes)

        keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
        bounding_boxes = bounding_boxes[keep]

        # use offsets predicted by pnet to transform bounding boxes
        bounding_boxes = calibrate_box(bounding_boxes[:, 0:5],
                                       bounding_boxes[:, 5:])
        # shape [n_boxes, 5]

        bounding_boxes = convert_to_square(bounding_boxes)
        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])

        # STAGE 2

        img_boxes = get_image_boxes(bounding_boxes, image, size=24)
        img_boxes = Variable(torch.FloatTensor(img_boxes).to(device))
        output = rnet(img_boxes)
        offsets = output[0].data.cpu().numpy()  # shape [n_boxes, 4]
        probs = output[1].data.cpu().numpy()  # shape [n_boxes, 2]

        keep = np.where(probs[:, 1] > thresholds[1])[0]
        bounding_boxes = bounding_boxes[keep]
        bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
        offsets = offsets[keep]

        keep = nms(bounding_boxes, nms_thresholds[1])
        bounding_boxes = bounding_boxes[keep]
        bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
        bounding_boxes = convert_to_square(bounding_boxes)
        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])

        # STAGE 3

        img_boxes = get_image_boxes(bounding_boxes, image, size=48)
        if len(img_boxes) == 0:
            return [], []
        img_boxes = Variable(torch.FloatTensor(img_boxes).to(device))
        output = onet(img_boxes)
        landmarks = output[0].data.cpu().numpy()  # shape [n_boxes, 10]
        offsets = output[1].data.cpu().numpy()  # shape [n_boxes, 4]
        probs = output[2].data.cpu().numpy()  # shape [n_boxes, 2]

        keep = np.where(probs[:, 1] > thresholds[2])[0]
        bounding_boxes = bounding_boxes[keep]
        bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
        offsets = offsets[keep]
        landmarks = landmarks[keep]

        # compute landmark points
        width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
        height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
        xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
        landmarks[:, 0:5] = np.expand_dims(
            xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
        landmarks[:, 5:10] = np.expand_dims(
            ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]

        bounding_boxes = calibrate_box(bounding_boxes, offsets)
        keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
        bounding_boxes = bounding_boxes[keep]
        landmarks = landmarks[keep]

        return bounding_boxes, landmarks