Example #1
0
    def __init__(self,
                 pnet_param="../models/p_net.pth",
                 rnet_param="../models/r_net.pth",
                 onet_param="../models/o_net.pth"):
        self.pnet = PNet()
        self.rnet = RNet()
        self.onet = ONet()

        self.pnet.load_state_dict(torch.load(pnet_param))
        self.rnet.load_state_dict(torch.load(rnet_param))
        self.onet.load_state_dict(torch.load(onet_param))

        self.pnet.eval()
        self.rnet.eval()
        self.onet.eval()

        self.__img_transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5327, 0.4363, 0.3878),
                                 (0.3018, 0.2817, 0.2800))
        ])
Example #2
0
from Nets.mtcnn_net import PNet
import os
from Trainers import trainer

if __name__ == '__main__':
    pnet = PNet()
    if not os.path.exists("../models"):
        os.makedirs("../models")
    train = trainer.Trainer(pnet, '../models/p_net.pth', r"E:\FaceDetectionMTCNNV2\datasets\12")
    train.train(0.01, alpha=0.9)
Example #3
0
class Detector:
    def __init__(self,
                 pnet_param="../models/p_net.pth",
                 rnet_param="../models/r_net.pth",
                 onet_param="../models/o_net.pth"):
        self.pnet = PNet()
        self.rnet = RNet()
        self.onet = ONet()

        self.pnet.load_state_dict(torch.load(pnet_param))
        self.rnet.load_state_dict(torch.load(rnet_param))
        self.onet.load_state_dict(torch.load(onet_param))

        self.pnet.eval()
        self.rnet.eval()
        self.onet.eval()

        self.__img_transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5327, 0.4363, 0.3878),
                                 (0.3018, 0.2817, 0.2800))
        ])

    def face_detect(self, image):
        pstart_time = time.time()
        pnet_boxes = self.__pnet_detect(image)
        if pnet_boxes.shape[0] == 0:
            return np.array([])
        pend_time = time.time()
        p_time = pend_time - pstart_time

        rstart_time = time.time()
        rnet_boxes = self.__rnet_detect(image, pnet_boxes)
        if rnet_boxes.shape[0] == 0:
            return np.array([])
        rend_time = time.time()
        r_time = rend_time - rstart_time

        ostart_time = time.time()
        onet_boxes = self.__onet_detect(image, rnet_boxes)
        if onet_boxes.shape[0] == 0:
            return np.array([])
        oend_time = time.time()
        o_time = oend_time - ostart_time

        time_sum = p_time + r_time + o_time
        print("totle time:{0}, p_time:{1}, r_time:{2}, o_time:{3}".format(
            time_sum, p_time, r_time, o_time))
        return onet_boxes

    def __pnet_detect(self, image):
        img_w, img_h = image.size
        min_side_len = min(img_w, img_h)
        scale = 1
        boxes = []
        while min_side_len > 12:
            img_data = self.__img_transforms(image)
            img_data = img_data.unsqueeze(0)

            _classify, _offset = self.pnet(img_data)
            classify, offset = _classify[0][0], _offset[0]
            indexes = torch.nonzero(torch.gt(classify, 0.6))
            for ids in indexes:
                boxes.append(
                    self.__box(ids, offset, classify[ids[0], ids[1]], scale))
            scale *= 0.709
            _weight = int(img_w * scale)
            _height = int(img_h * scale)
            image = image.resize((_weight, _height))
            min_side_len = np.minimum(_weight, _height)
        return nms(np.stack(boxes), 0.3, False)

    def __box(self, start_index, offset, cls, scale, stride=2, side_len=12):

        _x1 = int(start_index[1] * stride) / scale
        _y1 = int(start_index[0] * stride) / scale
        _x2 = int(start_index[1] * stride + side_len) / scale
        _y2 = int(start_index[0] * stride + side_len) / scale

        ow = _x2 - _x1
        oh = _y2 - _y1

        _offset = offset[:, start_index[0], start_index[1]]
        x1 = _x1 + ow * _offset[0]
        y1 = _y1 + oh * _offset[1]
        x2 = _x2 + ow * _offset[2]
        y2 = _y2 + oh * _offset[3]

        return [x1, y1, x2, y2, cls]

    def __rnet_detect(self, image, pnet_boxes):
        """
        R网络检测过程
        :param image:
        :param pnet_boxes:
        :return:
        """
        img_dataset = []
        pnet_boxes = convert_to_square(pnet_boxes)
        for box in pnet_boxes:
            crop_x1 = int(box[0])
            crop_y1 = int(box[1])
            crop_x2 = int(box[2])
            cropy_y2 = int(box[3])

            img = image.crop((crop_x1, crop_y1, crop_x2, cropy_y2))
            img = img.resize((24, 24))
            imgdata = self.__img_transforms(img)
            img_dataset.append(imgdata)
        img_dataset = torch.stack(img_dataset)
        classify, offset = self.rnet(img_dataset)
        classify, offset = classify.numpy(), offset.numpy()

        boxes = []
        indexes, _ = np.where(classify > 0.7)
        for idx in indexes:
            _box = pnet_boxes[idx]
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])
            ow = _x2 - _x1
            oh = _y2 - _y1
            x1 = _x1 + ow * offset[idx][0]
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]
            cls = classify[idx][0]
            boxes.append([x1, y1, x2, y2, cls])

        return nms(np.array(boxes), 0.3)

    def __onet_detect(self, image, rnet_boxes):
        img_dataset = []
        rnet_boxes = convert_to_square(rnet_boxes)
        for box in rnet_boxes:
            crop_x1 = int(box[0])
            crop_y1 = int(box[1])
            crop_x2 = int(box[2])
            crop_y2 = int(box[3])

            img = image.crop((crop_x1, crop_y1, crop_x2, crop_y2))
            img = img.resize((48, 48))
            img_data = self.__img_transforms(img)
            img_dataset.append(img_data)
        img_dataset = torch.stack(img_dataset)
        classify, offset = self.onet(img_dataset)
        classify, offset = classify.numpy(), offset.numpy()
        boxes = []
        indexes, _ = np.where(classify > 0.9)
        for idx in indexes:
            _box = rnet_boxes[idx]
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])
            ow = _x2 - _x1
            oh = _y2 - _y1
            x1 = _x1 + ow * offset[idx][0]
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]
            cls = classify[idx][0]
            boxes.append([x1, y1, x2, y2, cls])
        return nms(np.array(boxes), 0.3, isMin=True)
Example #4
0
class Detector:
    def __init__(self,
                 pnet_param="../models/p_net.pth",
                 rnet_param="../models/r_net.pth",
                 onet_param="../models/o_net.pth"):
        self.pnet = PNet()
        self.rnet = RNet()
        self.onet = ONet()

        self.pnet.load_state_dict(torch.load(pnet_param))
        self.rnet.load_state_dict(torch.load(rnet_param))
        self.onet.load_state_dict(torch.load(onet_param))

        self.pnet.eval()
        self.rnet.eval()
        self.onet.eval()

        self.__img_transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5327, 0.4363, 0.3878),
                                 (0.3018, 0.2817, 0.2800))
        ])

    def face_detect(self, image):
        pstart_time = time.time()
        pnet_boxes = self.__pnet_detect(image)
        if pnet_boxes.shape[0] == 0:
            return np.array([])
        pend_time = time.time()
        p_time = pend_time - pstart_time

        rstart_time = time.time()
        rnet_boxes = self.__rnet_detect(image, pnet_boxes)
        if rnet_boxes.shape[0] == 0:
            return np.array([])
        rend_time = time.time()
        r_time = rend_time - rstart_time

        ostart_time = time.time()
        onet_boxes = self.__onet_detect(image, rnet_boxes)
        if onet_boxes.shape[0] == 0:
            return np.array([])
        oend_time = time.time()
        o_time = oend_time - ostart_time

        time_sum = p_time + r_time + o_time
        print("totle time:{0}, p_time:{1}, r_time:{2}, o_time:{3}".format(
            time_sum, p_time, r_time, o_time))
        return onet_boxes

    def __pnet_detect(self, image):
        img_w, img_h = image.size
        min_side_len = min(img_w, img_h)
        scale = 1
        bboxes = []
        while min_side_len > 12:
            img_data = self.__img_transforms(image)
            img_data = img_data.unsqueeze(0)

            _classify, _offset = self.pnet(img_data)
            classify, offset = _classify[0][0], _offset[0]
            indexes = torch.nonzero(torch.gt(classify, 0.6))
            boxes = self.__box(indexes, offset, classify[indexes[:, 0],
                                                         indexes[:, 1]], scale)
            scale *= 0.707
            _weight = int(img_w * scale)
            _height = int(img_h * scale)
            image = image.resize((_weight, _height))
            min_side_len = np.minimum(_weight, _height)
            boxes = softnms(np.array(boxes),
                            thresh_iou=0.3,
                            thresh_conf=0.1,
                            isMin=False)
            bboxes.extend(boxes)
        # print(type(np.stack(boxes)))
        # print(np.stack(boxes).shape)
        return np.array(bboxes)  # (N, 5)

    def __box(self, start_index, offset, cls, scale, stride=2, side_len=12):

        _x1 = ((start_index[:, 1]).to(torch.float32) * stride) / scale
        _y1 = ((start_index[:, 0]).to(torch.float32) * stride) / scale
        _x2 = (
            (start_index[:, 1]).to(torch.float32) * stride + side_len) / scale
        _y2 = (
            (start_index[:, 0]).to(torch.float32) * stride + side_len) / scale

        ow = _x2 - _x1
        oh = _y2 - _y1

        _offset = offset[:, start_index[:, 0], start_index[:, 1]]
        x1 = _x1 + ow * _offset[0, :]
        y1 = _y1 + oh * _offset[1, :]
        x2 = _x2 + ow * _offset[2, :]
        y2 = _y2 + oh * _offset[3, :]
        bboxes = torch.stack([x1, y1, x2, y2, cls])
        bboxes = torch.transpose(bboxes, 1, 0)
        return bboxes

    def __rnet_detect(self, image, pnet_boxes):
        """
        R网络检测过程
        :param image:
        :param boxes:
        :return:
        """
        img_dataset = []
        pboxes = convert_to_square(pnet_boxes)  # shape(N, 5)
        crop_x1, crop_y1, crop_x2, crop_y2 = pboxes[:, 0].tolist(
        ), pboxes[:, 1].tolist(), pboxes[:, 2].tolist(), pboxes[:, 3].tolist()
        for i in range(len(pboxes)):
            img = image.crop(
                (int(crop_x1[i]), int(crop_y1[i]), int(crop_x2[i]),
                 int(crop_y2[i])))
            img = img.resize((24, 24))
            imgdata = self.__img_transforms(img)
            img_dataset.append(imgdata)
        img_dataset = torch.stack(img_dataset)
        classify, offset = self.rnet(img_dataset)
        classify, offset = classify.numpy(), offset.numpy(
        )  # offset:(53, 4)    cls:(53, 1)
        indexes, _ = np.where(
            classify > 0.7)  # shape(10,) [ 0  1  2  3  4  5  9 24 33 42]
        _x1, _y1, _x2, _y2 = pboxes[indexes][:, 0], pboxes[
            indexes][:, 1], pboxes[indexes][:, 2], pboxes[indexes][:, 3]
        ow = _x2 - _x1
        oh = _y2 - _y1
        boxes = np.zeros((indexes.shape[0], pboxes.shape[1]))
        boxes[:, 0] = _x1 + ow * offset[indexes][:, 0]
        boxes[:, 1] = _y1 + oh * offset[indexes][:, 1]
        boxes[:, 2] = _x2 + ow * offset[indexes][:, 2]
        boxes[:, 3] = _y2 + oh * offset[indexes][:, 3]
        boxes[:, 4] = classify[indexes][:, 0]
        return softnms(np.array(boxes),
                       thresh_iou=0.3,
                       thresh_conf=0.05,
                       isMin=False)

    def __onet_detect(self, image, rnet_boxes):
        img_dataset = []
        rboxes = convert_to_square(rnet_boxes)
        crop_x1, crop_y1, crop_x2, crop_y2 = rnet_boxes[:, 0].tolist(
        ), rnet_boxes[:, 1].tolist(), rnet_boxes[:, 2].tolist(
        ), rnet_boxes[:, 3].tolist()
        for i in range(len(rnet_boxes)):
            img = image.crop(
                (int(crop_x1[i]), int(crop_y1[i]), int(crop_x2[i]),
                 int(crop_y2[i])))
            img = img.resize((48, 48))
            img_data = self.__img_transforms(img)
            img_dataset.append(img_data)
        img_dataset = torch.stack(img_dataset)
        classify, offset = self.onet(img_dataset)
        classify, offset = classify.numpy(), offset.numpy()
        indexes, _ = np.where(classify > 0.9)
        _x1, _y1, _x2, _y2 = rboxes[indexes][:, 0], rboxes[
            indexes][:, 1], rboxes[indexes][:, 2], rboxes[indexes][:, 3]
        ow = _x2 - _x1
        oh = _y2 - _y1
        boxes = np.zeros((indexes.shape[0], rboxes.shape[1]))
        # offset_correct =np.zeros((indexes.shape[0], offset.shape[1]))
        # offset_correct[:, 0] = offset[indexes][:, 0]
        # offset_correct[:, 1] = offset[indexes][:, 1]
        # offset_correct[:, 2] = offset[indexes][:, 2]
        # offset_correct[:, 3] = offset[indexes][:, 3]
        boxes[:, 0] = _x1 + ow * offset[indexes][:, 0]
        boxes[:, 1] = _y1 + oh * offset[indexes][:, 1]
        boxes[:, 2] = _x2 + ow * offset[indexes][:, 2]
        boxes[:, 3] = _y2 + oh * offset[indexes][:, 3]
        boxes[:, 4] = classify[indexes][:, 0]
        # boxes = calibrate_box(boxes, offset_correct)
        return softnms(np.array(boxes),
                       thresh_iou=0.3,
                       thresh_conf=0.8,
                       isMin=True)