Beispiel #1
0
    def o(self):
        """transform out of tensor to numpy
            filter with confidence
            calculate coordinates
            filter with NMS
            draw"""
        start_time = datetime.datetime.now()
        data, prior = self.r()
        with torch.no_grad():
            confi, offset = self.onet(data.cuda())
        confi = confi.cpu().numpy().flatten()
        offset = offset.cpu().numpy()

        offset, prior, confi = offset[confi >= 0.999], prior[
            confi >= 0.999], confi[confi >= 0.999]

        offset, landmarks = offset[:, :4], offset[:, 4:]
        offset, landmarks = utils.transform(offset, landmarks, prior)

        boxes = np.hstack(
            (offset, np.expand_dims(confi,
                                    axis=1), landmarks))  # 将偏移量与置信度结合,进行NMS
        boxes = utils.NMS(boxes, threshold=0.4, ismin=True)
        end_time = datetime.datetime.now()
        print("ONet cost {}ms".format(
            (end_time - start_time).microseconds / 1000))
        return boxes
Beispiel #2
0
    def r(self):
        """transform out of tensor to numpy
            filter with confidence
            calculate coordinates
            filter with NMS
            crop image from original image for ONet's input
            draw"""
        start_time = time.time()
        data, prior = self.p()
        with torch.no_grad():
            confi, offset = self.rnet(data.cuda())
        confi = confi.cpu().numpy().flatten()
        offset = offset.cpu().numpy()

        offset, prior, confi = offset[confi >= 0.99], prior[confi >= 0.99], confi[confi >= 0.99]

        offset, landmarks = offset[:, :4], offset[:, 4:]
        offset, landmarks = utils.transform(offset, landmarks, prior)

        boxes = np.hstack((offset, np.expand_dims(confi, axis=1), landmarks))
        boxes = utils.NMS(boxes, threshold=0.6, ismin=False)

        o_data, o_prior = utils.crop_to_square(boxes[:, :5], 48, self.image)

        o_prior = np.stack(o_prior, axis=0)
        o_data = torch.stack(o_data, dim=0)
        end_time = time.time()
        print("RNet create {} candidate items\ncost {}s!".format(o_data.size(0), end_time - start_time))
        utils.draw(boxes, self.test_img, "RNet")
        return o_data, o_prior
Beispiel #3
0
    def p(self):
        """transform out of tensor to numpy
            filter with confidence
            calculate coordinates
            filter with NMS
            crop image from original image for RNet's input
            draw"""
        r_prior, r_data = [], []  # collect RNet's prior, RNet's input
        coordinates = []  # collect coordinates for draw
        count = 0
        start_time = time.time()
        while min(self.img.size) > 12:
            scal = 0.707**count  # 缩放比例,可以还原到原图  0.707为面积的一半
            input = tf.ToTensor()(self.img).unsqueeze(dim=0) - 0.5
            with torch.no_grad():
                confi, offset = self.pnet(input.cuda())
            W = offset.size(3)  # 取出图片的w值
            confi = confi.permute(0, 2, 3, 1)
            confi = confi.reshape(-1).cpu().numpy()
            offset = offset.permute(0, 2, 3, 1)  # 换轴,将四个通道数据组合到一起
            offset = offset.reshape((-1, 14)).cpu().numpy()

            o_index = np.arange(len(offset)).reshape(-1, 1)  # 特征图W_out*H_out
            offset, o_index, confi = offset[confi >= 0.9], o_index[
                confi >= 0.9], confi[confi >= 0.9]

            y_index, x_index = divmod(o_index,
                                      W)  # 索引/w  在特征图中对应索引为(x,y)=(余数, 商)
            x1, y1, x2, y2 = x_index * 2 / scal, y_index * 2 / scal, (
                x_index * 2 + 12) / scal, (y_index * 2 +
                                           12) / scal  # 左上角=索引*步长  右上角=左上角+边长
            p_prior = np.hstack((x1, y1, x2, y2))  # 将原图坐标组合为一个二维数组
            offset, landmarks = offset[:, :4], offset[:, 4:]
            offset, landmarks = utils.transform(offset, landmarks, p_prior)

            boxes = np.hstack((offset, np.expand_dims(confi, axis=1),
                               landmarks))  # 将偏移量与置信度结合,进行NMS
            boxes = utils.NMS(boxes, threshold=0.7, ismin=False)
            coordinates.extend(boxes.tolist())
            if boxes.shape[0] == 0:
                break

            data, prior = utils.crop_to_square(boxes[:, :5], 24, self.image)
            r_prior.extend(prior)
            r_data.extend(data)
            self.img = self.pyramid()  # 图像金字塔
            count += 1

        r_prior = np.stack(r_prior, axis=0)  # 数据重组,重新装载为numpy和tensor
        r_data = torch.stack(r_data, dim=0)
        end_time = time.time()
        print("PNet create {} candidate items\ncost {}s!".format(
            r_data.size(0), end_time - start_time))
        utils.draw(np.stack(coordinates, axis=0), self.test_img, "PNet")
        return r_data, r_prior
Beispiel #4
0
    def p(self):
        """transform out of tensor to numpy
            filter with confidence
            calculate coordinates
            filter with NMS
            crop image from original image for RNet's input
            draw"""
        start_time = datetime.datetime.now()
        r_prior, r_data = [], []  # collect RNet's prior, RNet's input
        coordinates = []  # collect coordinates for draw
        count = 0
        while min(self.img.size) > 12:
            scal = 0.707**count  # 0.707 make the area half of origin image
            input = tf.ToTensor()(self.img).unsqueeze(dim=0) - 0.5
            with torch.no_grad():
                confi, offset = self.pnet(input.cuda())
            confi, offset = confi.transpose(1, -1), offset.transpose(1, -1)

            mask = confi[..., 0] > 0.9
            confi = confi[mask].cpu().numpy()  # filter confi
            offset = offset[mask].cpu().numpy()  # filter offset

            index = mask.nonzero().cpu().numpy()  # index
            x_index, y_index = index[:, 1:2], index[:, 2:3]
            x1, y1, x2, y2 = x_index * 2 / scal, y_index * 2 / scal, (
                x_index * 2 + 12
            ) / scal, (
                y_index * 2 + 12
            ) / scal  # top_left*scal=index*stride  bottom_right*scal=top_left+12
            p_prior = np.hstack(([x1, y1, x2,
                                  y2]))  # translate to numpy which ndim=2

            offset, landmarks = offset[:, :4], offset[:, 4:]
            offset, landmarks = utils.transform(offset, landmarks, p_prior)

            boxes = np.hstack(
                (offset, confi,
                 landmarks))  # [[offset+confi+landmarks]] for NMS
            boxes = utils.NMS(boxes, threshold=0.7, ismin=False)
            coordinates.extend(boxes.tolist())
            if boxes.shape[0] == 0:
                break

            data, prior = utils.crop_to_square(boxes[:, :5], 24, self.image)
            r_prior.extend(prior)
            r_data.extend(data)
            self.img = self.pyramid()
            count += 1

        r_prior = np.stack(r_prior, axis=0)
        r_data = torch.stack(r_data, dim=0)
        end_time = datetime.datetime.now()
        print("PNet cost {}ms".format(
            (end_time - start_time).microseconds / 1000))
        return r_data, r_prior
Beispiel #5
0
    def rnet_detect(self, image, pnet_boxes):
        img_dataset = []
        pnet_boxes = utils.convertToRectangle(pnet_boxes)
        for pnet_box in pnet_boxes:
            x1 = int(pnet_box[0])
            y1 = int(pnet_box[1])
            x2 = int(pnet_box[2])
            y2 = int(pnet_box[3])
            img = image.crop((x1, y1, x2, y2))
            img = img.resize((24, 24))
            img_data = self.trans(img)
            img_dataset.append(img_data)
        img_dataset = torch.stack(img_dataset).to(self.device)

        with torch.no_grad():
            confidence, offset, _ = self.rnet(img_dataset)

        confidence = confidence.cpu().detach().numpy()
        offset = offset.cpu().detach().numpy()

        indexs, _ = np.where(confidence > 0.93)
        if indexs.shape[0] == 0:
            return np.array([])
        else:
            boxes = pnet_boxes[indexs]
            # 直接返回到P网络传入的真实框
            x1_array = boxes[:, 0]
            y1_array = boxes[:, 1]
            x2_array = boxes[:, 2]
            y2_array = boxes[:, 3]

            w_array = x2_array - x1_array
            h_array = y2_array - y1_array

            offset = offset[indexs]
            confidence = confidence[indexs]

            x1_real = x1_array + w_array * offset[:, 0]
            y1_real = y1_array + h_array * offset[:, 1]
            x2_real = x2_array + w_array * offset[:, 2]
            y2_real = y2_array + h_array * offset[:, 3]
            box = np.stack(
                [x1_real, y1_real, x2_real, y2_real, confidence[:, 0]], axis=1)
        return utils.NMS(box, 0.3)
Beispiel #6
0
    def pnet_detect(self, image):
        # 用于存放所有经过NMS删选的真实框
        boxes_nms_all = []
        w, h = image.size
        # 侦测图片中各种大小的人脸
        # min_length = np.minimum(w, h)
        # scale = scale_new = 1
        # 用于侦测图片,且图片里的人脸比较大
        scale = 0.7
        # 用于侦测视频,且视频里的人脸比较大
        # scale = 0.7**10
        w_ = int(w * scale)
        h_ = int(h * scale)
        min_length = np.minimum(w_, h_)
        scale_new = min_length / np.minimum(w, h)
        image = image.resize((w_, h_))

        while min_length > 12:
            img_data = self.trans(image).to(self.device)
            # 升维,因为存在批次这一维度
            img_data.unsqueeze_(0)
            with torch.no_grad():
                confidence, offset, _ = self.pnet(img_data)
            confidence = confidence[0][0].cpu().detach()
            offset = offset[0].cpu().detach()
            # 根据阈值先删除掉一些置信度低的候选框,并返回符合要求的索引
            indexs = torch.nonzero(torch.gt(confidence, 0.8))
            if indexs.shape[0] == 0:
                nms = np.array([])
            else:
                boxes = self.backToImage(np.array(indexs, dtype=np.float),
                                         offset, scale_new, confidence)
                nms = utils.NMS(boxes, 0.3)
            boxes_nms_all.extend(nms)
            scale *= 0.7
            w_ = int(w * scale)
            h_ = int(h * scale)
            min_length = np.minimum(w_, h_)
            scale_new = min_length / np.minimum(w, h)
            image = image.resize((w_, h_))
        if len(boxes_nms_all) == 0:
            return np.array([])
        boxes_nms_all = np.stack(boxes_nms_all)
        return boxes_nms_all
Beispiel #7
0
    def o(self):
        """transform out of tensor to numpy
            filter with confidence
            calculate coordinates
            filter with NMS
            draw"""
        data, prior = self.r()
        confi, offset = self.onet(data.cuda())
        confi = confi.data.cpu().numpy().flatten()
        offset = offset.data.cpu().numpy()
        offset, prior, confi = offset[confi >= 0.999], prior[confi >= 0.999], confi[confi >= 0.999]
        offset, landmarks = offset[:, :4], offset[:, 4:]
        offset, landmarks = utils.transform(offset, landmarks, prior)

        boxes = np.hstack((offset, np.expand_dims(confi, axis=1), landmarks))  # 将偏移量与置信度以及landmarks结合,进行NMS
        boxes = utils.NMS(boxes, threshold=0.4, ismin=True)

        print("ONet create {} candidate items".format(boxes.shape[0]))
        utils.draw(boxes, self.test_img, "ONet")
Beispiel #8
0
def pnet_prediction(img, PNet, thresholds):
    
    temp = img.copy() / 255. 
    orig_h, orig_w, orig_c = temp.shape 
    # 生成用于制作图像金字塔的缩放比例列表
    scales = utils.calculate_scales(temp)
    PNet_outputs = []

    t0 = time.time()
    # 生成图像金字塔列表并逐一预测结果
    for scale in scales:
        scale_h = int(orig_h * scale)
        scale_w = int(orig_w * scale)
        scaled_img = cv2.resize(temp, (scale_w, scale_h)) # OpenCV中宽在前
        input_img = scaled_img.reshape(1, *scaled_img.shape) # reshape to (1, scale_h, scale_w, orig_c)
        pred = PNet.predict(input_img) # pred is a list of 2 arrays with the shapes (1, ?, ?, 2) & (1, ?, ?, 4)
        PNet_outputs.append(pred) 
    img_num = len(scales)
    
    rectangles_list = []
    for i in range(img_num):
        prob = PNet_outputs[i][0][0][:, :, 0] # 是“人脸”的置信度,对应前面(1, ?, ?, 1)中的(?, ?)
        roi = PNet_outputs[i][1][0] # 人脸框的坐标偏移比例,对应前面(1, ?, ?, 4)中的(?, ?, 4)
        
        out_h, out_w = prob.shape # 每个点的值对应一个12 x 12框是否有”人“的置信度
        out_side = max(out_h, out_w) # ???
        
        prob = np.swapaxes(prob, 0, 1) 
        roi = np.swapaxes(roi, 0, 2) # shape变为(4, ?, ?)
        rectangles = utils.pnet_detect_face(prob, roi, out_side, 1 / scales[i], orig_w, orig_h, thresholds[0])
        rectangles_list.extend(rectangles) # 每个rectangles包含(num, x1, y1, x2, y2, score)
    
    rectangles_list = utils.NMS(rectangles_list, 0.7, 'iou') 
    
    t1 = time.time()
    print("Inference time for P-Net is " + str(t1 - t0))

    return rectangles_list
Beispiel #9
0
                                            batch_size=1,
                                            shuffle=True,
                                            num_workers=2)
    result = []
    for data in iter(test_data):
        data = data.float()
        output = model(data)
        output = torch.max(output, 1)
        result.append(output)
    result = np.asarray(result)
    input = np.asarray(input).reshape(-1, 1)
    candidate = np.asarray(candidate).reshape(-1, 4)
    nms_sum = np.concatenate((candidate, input, result), axis=1)

    # NMS
    regions = utils.NMS(nms_sum)

    # draw rectangles on the original image
    fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6))
    ax.imshow(image)
    for x, y, w, h, label in regions:
        print(x, y, w, h)
        rect = mpatches.Rectangle((x, y),
                                  w,
                                  h,
                                  fill=False,
                                  edgecolor='red',
                                  linewidth=1)
        ax.add_patch(rect)
        plt.annotate(label, xy=(x, y))
Beispiel #10
0
    def detectFace(self, img, threshold):
        #-----------------------------#
        #        归一化
        #-----------------------------#
        copy_img = (img.copy() - 127.5) / 127.5
        origin_h, origin_w, _ = copy_img.shape
        print("orgin image's shape is: ", origin_h, origin_w)
        #-----------------------------#
        #        计算原始输入图像
        #        每一次缩放的比例
        #-----------------------------#
        scales = utils.calculateScales(img)

        out = []

        #-----------------------------#
        #        粗略计算人脸框
        #        pnet部分
        #-----------------------------#
        for scale in scales:
            hs = int(origin_h * scale)
            ws = int(origin_w * scale)
            scale_img = cv2.resize(copy_img, (ws, hs))
            inputs = np.expand_dims(scale_img, 0).astype(np.float32)
            # print('inputs shape is: ', inputs.shape)
            output = self.Pnet.run([self.Pnet_outputs[0], self.Pnet_outputs[1]],
                                    {self.Pnet_inputs[0]: inputs})
            # print(output[0].shape)
            # print(output[1].shape)
            output = [output[0][0], output[1][0]]
            out.append(output)

        # print(out)

        rectangles = []
        #----------------------------------------------------------#
        #        在这个地方我们对图像金字塔的预测结果进行循环
        #        取出每张图片的种类预测和回归预测结果
        #----------------------------------------------------------#
        for i in range(len(scales)):
            #------------------------------------------------------------------#
            #   为了方便理解,这里和视频上看到的不太一样
            #   因为我们在上面对图像金字塔循环的时候就把batch_size维度给去掉了
            #------------------------------------------------------------------#
            cls_prob = out[i][0][:, :, 1]
            roi = out[i][1]
            #--------------------------------------------#
            #   取出每个缩放后图片的高宽
            #--------------------------------------------#
            out_h, out_w = cls_prob.shape
            out_side = max(out_h, out_w)
            #--------------------------------------------#
            #   解码的过程
            #--------------------------------------------#
            rectangle = utils.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0])
            rectangles.extend(rectangle)

        #-----------------------------------------#
        #    进行非极大抑制
        #-----------------------------------------#
        rectangles = np.array(utils.NMS(rectangles, 0.7))

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------------------#
        #    稍微精确计算人脸框
        #    Rnet部分
        #-----------------------------------------#
        predict_24_batch = []
        for rectangle in rectangles:
            #--------------------------------------------#
            #    利用获取到的粗略坐标,在原图上进行截取
            #--------------------------------------------#
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
            #--------------------------------------------#
            #    将截取到的图片进行resize,调整成24x24的大小
            #--------------------------------------------#
            scale_img = cv2.resize(crop_img, (24, 24))
            predict_24_batch.append(scale_img)

        cls_prob, roi_prob = self.Rnet.run([self.Rnet_outputs[0], self.Rnet_outputs[1]],
                                           {self.Rnet_inputs[0]: np.array(predict_24_batch).astype(np.float32)})
        # print("cls_prob: ", cls_prob.shape)
        # print("roi_prob: ", roi_prob.shape)
        #------------------------------------------#
        #    解码的过程
        #------------------------------------------#
        rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1])
        # print(rectangles)

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        #    计算人脸框
        #    onet部分
        #-----------------------------#
        predict_batch = []
        for rectangle in rectangles:
            #------------------------------------------#
            #   利用获取到的粗略坐标,在原图上进行截取
            #------------------------------------------#
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
            #-----------------------------------------------#
            #   将截取到的图片进行resize,调整成48x48的大小
            #-----------------------------------------------#
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)
        # print(predict_batch)

        cls_prob, roi_prob, pts_prob = self.Onet.run([self.Onet_outputs[0], self.Onet_outputs[1], self.Onet_outputs[2]],
                                                     {self.Onet_inputs[0]: np.array(predict_batch).astype(np.float32)})

        #-----------------------------#
        #    解码的过程
        #-----------------------------#
        rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2])

        return rectangles
Beispiel #11
0
def evaluate(model, path, iou_thres, conf_thres, nms_thres, image_size,
             batch_size, num_workers, device):
    # 모델을 evaluation mode로 설정
    model.eval()

    # 데이터셋, 데이터로더 설정
    dataset = datasets.ListDataset(path,
                                   image_size,
                                   augment=False,
                                   multiscale=False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=num_workers,
                                             collate_fn=dataset.collate_fn)

    labels = []
    sample_metrics = []  # List[Tuple] -> [(TP, confs, pred)]
    entire_time = 0
    for _, images, targets in tqdm.tqdm(dataloader,
                                        desc='Evaluate method',
                                        leave=False):
        if targets is None:
            continue

        # Extract labels
        labels.extend(targets[:, 1].tolist())

        # Rescale targets
        targets[:, 2:] = utils.xywh2xyxy(targets[:, 2:])
        targets[:, 2:] *= image_size

        # Predict objects
        start_time = time.time()
        with torch.no_grad():
            images = images.to(device)
            outputs = model(images)
            outputs = utils.NMS(outputs, conf_thres, nms_thres)
        entire_time += time.time() - start_time

        # Compute true positives, predicted scores and predicted labels per batch
        sample_metrics.extend(
            utils.get_batch_statistics(outputs, targets, iou_thres))

    # Concatenate sample statistics
    if len(sample_metrics) == 0:
        true_positives, pred_scores, pred_labels = np.array([]), np.array(
            []), np.array([])
    else:
        true_positives, pred_scores, pred_labels = [
            np.concatenate(x, 0) for x in list(zip(*sample_metrics))
        ]

    # Compute AP
    precision, recall, AP, f1, ap_class = utils.ap_per_class(
        true_positives, pred_scores, pred_labels, labels)

    # Compute inference time and fps
    inference_time = entire_time / dataset.__len__()
    fps = 1 / inference_time

    # Export inference time to miliseconds
    inference_time *= 1000

    return precision, recall, AP, f1, ap_class, inference_time, fps
Beispiel #12
0
    def detectFace(self, img, threshold):
        """Detect the face and get the face detection box"""
        copy_img = (img.copy() - 127.5) / 127.5  # 归一化
        origin_h, origin_w, _ = copy_img.shape  # 原始图像大小
        scales = utils.calculateScales(img)  # 计算原始输入图像缩放的比例

        #-------------------------------------------------#
        # pnet部分:粗略计算人脸框
        # 先粗略预测,存放到 out
        # 然后进行解码预测,生成人脸框(粗略坐标),存放到 rectangles
        #-------------------------------------------------#
        out = []
        rectangles = []
        for scale in scales:
            hs = int(origin_h * scale)  # 缩放
            ws = int(origin_w * scale)  # 缩放
            scale_img = cv2.resize(copy_img, (ws, hs))
            inputs = np.expand_dims(scale_img, 0)
            ouput = self.Pnet.predict(inputs)
            ouput = [ouput[0][0], ouput[1][0]]  # 一张图片二维图,消除第三维数据
            out.append(ouput)
        for i in range(len(scales)):
            cls_prob = out[i][0][:, :, 1]
            out_h, out_w = cls_prob.shape
            out_side = max(out_h, out_w)
            roi = out[i][1]
            rectangle = utils.detect_face_12net(cls_prob, roi, out_side,
                                                1 / scales[i], origin_w,
                                                origin_h, threshold[0])  # 解码
            rectangles.extend(rectangle)

        rectangles = np.array(utils.NMS(rectangles, 0.7))  # 非极大抑制

        if len(rectangles) == 0:
            return []

        #--------------------------------------#
        # Rnet部分:稍微精确计算人脸框
        # 最后将人脸框转化为正方形
        #--------------------------------------#
        predict_24_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(
                                    rectangle[2])]  # 利用获取到的粗略坐标,在原图上进行截取
            scale_img = cv2.resize(crop_img, (24, 24))
            predict_24_batch.append(scale_img)

        cls_prob, roi_prob = self.Rnet.predict(np.array(predict_24_batch))

        rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles,
                                             origin_w, origin_h,
                                             threshold[1])  # 解码

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        # Onet部分:计算人脸框
        # 输出五个人脸关键点定位(眼睛、嘴角、鼻尖)
        #-----------------------------#
        predict_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(
                                    rectangle[2])]  # 利用获取到的粗略坐标,在原图上进行截取
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)

        cls_prob, roi_prob, pts_prob = self.Onet.predict(
            np.array(predict_batch))

        rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                             rectangles, origin_w, origin_h,
                                             threshold[2])  # 解码

        return rectangles
def detectFace(img, threshold):
    #-----------------------------#
    #  Normalized
    #-----------------------------#
    copy_img = (img.copy() - 127.5) / 127.5
    origin_h, origin_w, _ = copy_img.shape
    #-----------------------------#
    #   Calculate the original input image
    #   The ratio of each zoom
    #-----------------------------#
    scales = utils.calculateScales(img)
    out = []
    #-----------------------------#
    #   Roughly calculate the face frame
    #   pnet part
    #-----------------------------#
    for scale in scales:
        hs = int(origin_h * scale)
        ws = int(origin_w * scale)
        scale_img = cv2.resize(copy_img, (ws, hs))
        inputs = scale_img.reshape(1, *scale_img.shape)
        #ouput = self.Pnet.predict(inputs)
        ouput = Pnet.predict(inputs)
        out.append(ouput)

    image_num = len(scales)
    rectangles = []
    for i in range(image_num):
        # Probability of face
        cls_prob = out[i][0][0][:, :, 1]
        #print(cls_prob.shape)

        # The position of its corresponding box
        roi = out[i][1][0]
        #print(roi.shape)

        # Take out the length and width of each zoomed picture
        out_h, out_w = cls_prob.shape
        out_side = max(out_h, out_w)
        #print(cls_prob.shape)

        # Decoding process
        rectangle = utils.detect_face_12net(cls_prob, roi, out_side,
                                            1 / scales[i], origin_w, origin_h,
                                            0.7)
        rectangles.extend(rectangle)

    # Non-maximum suppression
    rectangles = utils.NMS(rectangles, 0.7)

    if len(rectangles) == 0:
        return rectangles

    #-----------------------------#
    #   Calculating face frame
    #   onet part
    #-----------------------------#
    predict_batch = []
    for rectangle in rectangles:
        crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                            int(rectangle[0]):int(rectangle[2])]
        scale_img = cv2.resize(crop_img, (48, 48))
        predict_batch.append(scale_img)

    predict_batch = np.array(predict_batch)
    #output = self.Onet.predict(predict_batch)
    output = Onet.predict(predict_batch)
    cls_prob = output[0]
    roi_prob = output[1]
    pts_prob = output[2]

    rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                         rectangles, origin_w, origin_h, 0.7)

    return rectangles
Beispiel #14
0
    def onet_detect(self, image, rnet_boxes):
        img_dataset = []
        rnet_boxes = utils.convertToRectangle(rnet_boxes)
        for rnet_box in rnet_boxes:
            x1 = int(rnet_box[0])
            y1 = int(rnet_box[1])
            x2 = int(rnet_box[2])
            y2 = int(rnet_box[3])

            img = image.crop((x1, y1, x2, y2))
            img = img.resize((48, 48))
            img_data = self.trans(img)
            img_dataset.append(img_data)

        img_dataset = torch.stack(img_dataset).to(self.device)

        with torch.no_grad():
            confidence, offset, landmarks = self.onet(img_dataset)
        confidence = confidence.cpu().detach().numpy()
        offset = offset.cpu().detach().numpy()
        landmarks = landmarks.cpu().detach().numpy()

        indexs, _ = np.where(confidence > 0.99)
        if indexs.shape[0] == 0:
            return np.array([])
        else:
            boxes = rnet_boxes[indexs]
            x1_array = boxes[:, 0]
            y1_array = boxes[:, 1]
            x2_array = boxes[:, 2]
            y2_array = boxes[:, 3]

            w_array = x2_array - x1_array
            h_array = y2_array - y1_array

            offset = offset[indexs]
            confidence = confidence[indexs]
            landmarks = landmarks[indexs]

            x1_real = x1_array + w_array * offset[:, 0]
            y1_real = y1_array + h_array * offset[:, 1]
            x2_real = x2_array + w_array * offset[:, 2]
            y2_real = y2_array + h_array * offset[:, 3]

            landmarks_x1, landmarks_y1 = x1_array + w_array * landmarks[:,
                                                                        0], y1_array + h_array * landmarks[:,
                                                                                                           1]
            landmarks_x2, landmarks_y2 = x1_array + w_array * landmarks[:,
                                                                        2], y1_array + h_array * landmarks[:,
                                                                                                           3]
            landmarks_x3, landmarks_y3 = x1_array + w_array * landmarks[:,
                                                                        4], y1_array + h_array * landmarks[:,
                                                                                                           5]
            landmarks_x4, landmarks_y4 = x1_array + w_array * landmarks[:,
                                                                        6], y1_array + h_array * landmarks[:,
                                                                                                           7]
            landmarks_x5, landmarks_y5 = x1_array + w_array * landmarks[:,
                                                                        8], y1_array + h_array * landmarks[:,
                                                                                                           9]

            boxes = np.stack([
                x1_real, y1_real, x2_real, y2_real, confidence[:, 0],
                landmarks_x1, landmarks_y1, landmarks_x2, landmarks_y2,
                landmarks_x3, landmarks_y3, landmarks_x4, landmarks_y4,
                landmarks_x5, landmarks_y5
            ],
                             axis=1)
            # 判断关键点是否在真实框中
            empty_box = []
            for box in boxes:
                if (box[5] > box[0] and box[6] > box[1] and box[7] < box[2]
                        and box[8] > box[1]) and (
                            box[9] > box[0] and box[10] > box[1]
                            and box[9] < box[2] and box[10] < box[3]) and (
                                box[11] > box[0] and box[12] < box[3]
                                and box[13] < box[2] and box[14] < box[3]):
                    empty_box.append(box)
            boxes = np.stack(empty_box)
        # box = np.stack(boxes)
        return utils.NMS(boxes, 0.3, isMin=True)
    def detectFace(self, img, threshold):
        #-----------------------------#
        #   归一化
        #-----------------------------#
        copy_img = (img.copy() - 127.5) / 127.5
        origin_h, origin_w, _ = copy_img.shape
        #-----------------------------#
        #   计算原始输入图像
        #   每一次缩放的比例
        #-----------------------------#
        scales = utils.calculateScales(img)  #比例数组

        out = []
        #-----------------------------#
        #   粗略计算人脸框
        #   pnet部分
        #-----------------------------#
        for scale in scales:  #尺寸缩放
            hs = int(origin_h * scale)  #按照比例缩放
            ws = int(origin_w * scale)
            scale_img = cv2.resize(copy_img, (ws, hs))
            inputs = scale_img.reshape(1, *scale_img.shape)  #各个尺寸pnet输入
            ouput = self.Pnet.predict(
                inputs)  #pnet输出#获得classifier,bbox_regress
            out.append(ouput)

        image_num = len(scales)
        rectangles = []
        for i in range(image_num):
            # 有人脸的概率
            cls_prob = out[i][0][0][:, :, 1]
            # 其对应的框的位置
            roi = out[i][1][0]

            # 取出每个缩放后图片的长宽
            out_h, out_w = cls_prob.shape
            out_side = max(out_h, out_w)
            #print(cls_prob.shape)
            # 解码过程
            rectangle = utils.detect_face_12net(
                cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h,
                threshold[0])  #获取人脸矩形框及其为人脸的概率[坐标,概率]
            rectangles.extend(rectangle)

        # 进行非极大抑制
        rectangles = utils.NMS(rectangles, 0.7)

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        #   稍微精确计算人脸框

        #   Rnet部分
        #-----------------------------#
        predict_24_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(
                                    rectangle[2])]  #pnet后的候选框
            scale_img = cv2.resize(crop_img, (24, 24))
            predict_24_batch.append(scale_img)  #PNET

        predict_24_batch = np.array(predict_24_batch)
        out = self.Rnet.predict(predict_24_batch)  #Rnet的预测值

        cls_prob = out[0]
        cls_prob = np.array(cls_prob)
        roi_prob = out[1]
        roi_prob = np.array(roi_prob)
        rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles,
                                             origin_w, origin_h, threshold[1])

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        #   计算人脸框
        #   onet部分
        #-----------------------------#
        predict_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)  #rnet的候选框

        predict_batch = np.array(predict_batch)
        output = self.Onet.predict(predict_batch)
        cls_prob = output[0]
        roi_prob = output[1]
        pts_prob = output[2]

        rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                             rectangles, origin_w, origin_h,
                                             threshold[2])

        return rectangles
    def detectFace(self, img, threshold):
        #-----------------------------#
        #        归一化
        #-----------------------------#
        copy_img = (img.copy() - 127.5) / 127.5
        origin_h, origin_w, _ = copy_img.shape
        # print("orgin image's shape is: ", origin_h, origin_w)
        #-----------------------------#
        #        计算原始输入图像
        #        每一次缩放的比例
        #-----------------------------#
        scales = utils.calculateScales(img)

        out = []

        #-----------------------------#
        #        粗略计算人脸框
        #        pnet部分
        #-----------------------------#
        for scale in scales:
            pnet_inputs = []
            pnet_outputs = []
            hs = int(origin_h * scale)
            ws = int(origin_w * scale)
            scale_img = cv2.resize(copy_img, (ws, hs))
            inputs = np.expand_dims(scale_img, 0).astype(np.float32)

            pnet_inputs.append(
                tritonclient.http.InferInput(self.Pnet_inputs[0], inputs.shape,
                                             'FP32'))
            pnet_inputs[0].set_data_from_numpy(inputs, binary_data=True)

            pnet_outputs.append(
                tritonclient.http.InferRequestedOutput(self.Pnet_outputs[0],
                                                       binary_data=True))
            pnet_outputs.append(
                tritonclient.http.InferRequestedOutput(self.Pnet_outputs[1],
                                                       binary_data=True))

            t1 = time.time()
            output = self.triton_client.infer("pnet_tf",
                                              inputs=pnet_inputs,
                                              outputs=pnet_outputs)
            t2 = time.time()
            # print('pnet cost: {}ms'.format(1000*(t2 - t1)))
            # print(output.as_numpy(self.Pnet_outputs[0]).shape)
            # print(output.as_numpy(self.Pnet_outputs[1]).shape)
            output = [
                output.as_numpy(self.Pnet_outputs[0])[0],
                output.as_numpy(self.Pnet_outputs[1])[0]
            ]
            out.append(output)

            # print(out)

        rectangles = []
        #-------------------------------------------------#
        #   在这个地方我们对图像金字塔的预测结果进行循环
        #   取出每张图片的种类预测和回归预测结果
        #-------------------------------------------------#
        for i in range(len(scales)):
            #------------------------------------------------------------------#
            #   为了方便理解,这里和视频上看到的不太一样
            #   因为我们在上面对图像金字塔循环的时候就把batch_size维度给去掉了
            #------------------------------------------------------------------#
            cls_prob = out[i][0][:, :, 1]
            roi = out[i][1]
            #--------------------------------------------#
            #   取出每个缩放后图片的高宽
            #--------------------------------------------#
            out_h, out_w = cls_prob.shape
            out_side = max(out_h, out_w)
            #--------------------------------------------#
            #   解码的过程
            #--------------------------------------------#
            rectangle = utils.detect_face_12net(cls_prob, roi, out_side,
                                                1 / scales[i], origin_w,
                                                origin_h, threshold[0])
            rectangles.extend(rectangle)

        #-----------------------------------------#
        #    进行非极大抑制
        #-----------------------------------------#
        rectangles = np.array(utils.NMS(rectangles, 0.7))
        # print(rectangles)

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------------------#
        #    稍微精确计算人脸框
        #    Rnet部分
        #-----------------------------------------#
        predict_24_batch = []
        for rectangle in rectangles:
            #--------------------------------------------#
            #    利用获取到的粗略坐标,在原图上进行截取
            #--------------------------------------------#
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            #--------------------------------------------#
            #    将截取到的图片进行resize,调整成24x24的大小
            #--------------------------------------------#
            scale_img = cv2.resize(crop_img, (24, 24))
            predict_24_batch.append(scale_img)

        # print('rnet的输入: ', np.array(predict_24_batch).shape)

        rnet_inputs = []
        rnet_outputs = []
        rnet_inputs.append(
            tritonclient.http.InferInput(self.Rnet_inputs[0],
                                         np.array(predict_24_batch).shape,
                                         'FP32'))
        rnet_inputs[0].set_data_from_numpy(np.array(predict_24_batch).astype(
            np.float32),
                                           binary_data=True)

        rnet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Rnet_outputs[0],
                                                   binary_data=True))
        rnet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Rnet_outputs[1],
                                                   binary_data=True))

        t1 = time.time()
        output = self.triton_client.infer("rnet_tf",
                                          inputs=rnet_inputs,
                                          outputs=rnet_outputs)
        t2 = time.time()
        # print('rnet cost: {}ms'.format(1000*(t2-t1)))
        # print(output.as_numpy(self.Rnet_outputs[0]).shape)
        # print(output.as_numpy(self.Rnet_outputs[1]).shape)
        cls_prob, roi_prob = output.as_numpy(
            self.Rnet_outputs[0]), output.as_numpy(self.Rnet_outputs[1])
        # print('cls_prob is: ')
        # print(cls_prob)
        # print('roi_prob is: ')
        # print(roi_prob)
        #-------------------------------------#
        #   解码的过程
        #-------------------------------------#
        rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles,
                                             origin_w, origin_h, threshold[1])

        if len(rectangles) == 0:
            return rectangles

        # print(rectangles)

        #-----------------------------#
        #   计算人脸框
        #   onet部分
        #-----------------------------#
        predict_batch = []
        for rectangle in rectangles:
            #------------------------------------------#
            #   利用获取到的粗略坐标,在原图上进行截取
            #------------------------------------------#
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            #-----------------------------------------------#
            #   将截取到的图片进行resize,调整成48x48的大小
            #-----------------------------------------------#
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)

        # print('onet的输入: ', np.array(predict_batch).shape)
        onet_inputs = []
        onet_outputs = []
        onet_inputs.append(
            tritonclient.http.InferInput(self.Onet_inputs[0],
                                         np.array(predict_batch).shape,
                                         'FP32'))
        onet_inputs[0].set_data_from_numpy(np.array(predict_batch).astype(
            np.float32),
                                           binary_data=True)

        onet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Onet_outputs[0],
                                                   binary_data=True))
        onet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Onet_outputs[1],
                                                   binary_data=True))
        onet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Onet_outputs[2],
                                                   binary_data=True))

        t1 = time.time()
        output = self.triton_client.infer("onet_tf",
                                          inputs=onet_inputs,
                                          outputs=onet_outputs)
        t2 = time.time()
        # print('onet cost: {}ms'.format(1000*(t2-t1)))
        cls_prob, roi_prob, pts_prob = output.as_numpy(
            self.Onet_outputs[0]), output.as_numpy(
                self.Onet_outputs[1]), output.as_numpy(self.Onet_outputs[2])

        #-------------------------------------#
        #   解码的过程
        #-------------------------------------#
        # print('cls_prob:')
        # print(cls_prob)
        # print('roi_prob:')
        # print(roi_prob)
        # print('pts_prob:')
        # print(pts_prob)
        rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                             rectangles, origin_w, origin_h,
                                             threshold[2])

        return rectangles
def detect_face(img, thresholds):
    '''P-Net Prediction'''
    temp = (img.copy() - 127.5) / 127.5
    orig_h, orig_w, orig_c = temp.shape
    # 生成用于制作图像金字塔的缩放比例列表
    scales = utils.calculate_scales(temp)
    Pnet_outputs = []
    t0 = time.time()

    # 生成图像金字塔列表并逐一预测结果
    for scale in scales:
        scale_h = int(orig_h * scale)
        scale_w = int(orig_w * scale)
        scaled_img = cv2.resize(temp, (scale_w, scale_h))  # OpenCV中宽在前
        input_img = scaled_img.reshape(
            1, *scaled_img.shape)  # reshape to (1, scale_h, scale_w, orig_c)
        pred = Pnet.predict(
            input_img
        )  # pred is a list of 2 arrays with the shapes (1, ?, ?, 2) & (1, ?, ?, 4)
        Pnet_outputs.append(pred)
    img_num = len(scales)

    rectangles_list = []
    for i in range(img_num):
        prob = Pnet_outputs[i][0][0][:, :,
                                     1]  # 是“人脸”的置信度,对应前面(1, ?, ?, 2)中的(?, ?)
        roi = Pnet_outputs[i][1][0]  # 人脸框的坐标偏移比例,对应前面(1, ?, ?, 4)中的(?, ?, 4)

        out_h, out_w = prob.shape  # 每个点的值对应一个12 x 12框是否有”人“的置信度
        out_side = max(out_h, out_w)  # ???

        prob = np.swapaxes(prob, 0, 1)
        roi = np.swapaxes(roi, 0, 2)  # shape变为(4, ?, ?)
        rectangles = utils.pnet_detect_face(prob, roi, out_side, 1 / scales[i],
                                            orig_w, orig_h, thresholds[0])
        rectangles_list.extend(
            rectangles)  # 每个rectangles包含(num, x1, y1, x2, y2, score)

    rectangles_list = utils.NMS(rectangles_list, 0.7, 'iou')

    t1 = time.time()
    print("Time for P-Net is " + str(t1 - t0))

    if len(rectangles_list) == 0:
        return rectangles_list
    '''R-Net Prediction'''
    cropping_count = 0  # 记录对该张图片的裁取次数
    Rnet_inputs = []

    for rectangle in rectangles_list:
        cropped_img = temp[int(rectangle[1]):int(rectangle[3]),
                           int(rectangle[0]):int(rectangle[2])]
        scaled_img = cv2.resize(cropped_img, (24, 24))
        Rnet_inputs.append(scaled_img)
        cropping_count += 1

    Rnet_inputs = np.array(Rnet_inputs)
    Rnet_outputs = Rnet.predict(Rnet_inputs)
    prob = Rnet_outputs[0]
    roi = Rnet_outputs[1]
    prob = np.array(prob)
    roi = np.array(roi)

    rectangles_list = utils.rnet_detect_face(prob, roi, rectangles_list,
                                             orig_w, orig_h, thresholds[1])

    t2 = time.time()
    print("Time for R-Net is " + str(t2 - t1))

    if len(rectangles_list) == 0:
        return rectangles_list
    '''O-Net Prediction'''
    cropping_count = 0
    Onet_inputs = []

    for rectangle in rectangles_list:
        cropped_img = temp[int(rectangle[1]):int(rectangle[3]),
                           int(rectangle[0]):int(rectangle[2])]
        scaled_img = cv2.resize(cropped_img, (48, 48))
        Onet_inputs.append(scaled_img)
        cropping_count += 1

    Onet_inputs = np.array(Onet_inputs)
    Onet_outputs = Onet.predict(Onet_inputs)
    prob = Onet_outputs[0]
    roi = Onet_outputs[1]
    pts = Onet_outputs[2]

    rectangles = utils.onet_detect_face(prob, roi, pts, rectangles_list,
                                        orig_w, orig_h, thresholds[2])

    t3 = time.time()
    print("Time for O-Net is " + str(t3 - t2))

    return rectangles