Esempio n. 1
0
    def detectFace(self, img, threshold):
        #-----------------------------#
        #        归一化
        #-----------------------------#
        copy_img = (img.copy() - 127.5) / 127.5
        origin_h, origin_w, _ = copy_img.shape
        print("orgin image's shape is: ", origin_h, origin_w)
        #-----------------------------#
        #        计算原始输入图像
        #        每一次缩放的比例
        #-----------------------------#
        scales = utils.calculateScales(img)

        out = []

        #-----------------------------#
        #        粗略计算人脸框
        #        pnet部分
        #-----------------------------#
        for scale in scales:
            hs = int(origin_h * scale)
            ws = int(origin_w * scale)
            scale_img = cv2.resize(copy_img, (ws, hs))
            inputs = np.expand_dims(scale_img, 0).astype(np.float32)
            # print('inputs shape is: ', inputs.shape)
            output = self.Pnet.run([self.Pnet_outputs[0], self.Pnet_outputs[1]],
                                    {self.Pnet_inputs[0]: inputs})
            # print(output[0].shape)
            # print(output[1].shape)
            output = [output[0][0], output[1][0]]
            out.append(output)

        # print(out)

        rectangles = []
        #----------------------------------------------------------#
        #        在这个地方我们对图像金字塔的预测结果进行循环
        #        取出每张图片的种类预测和回归预测结果
        #----------------------------------------------------------#
        for i in range(len(scales)):
            #------------------------------------------------------------------#
            #   为了方便理解,这里和视频上看到的不太一样
            #   因为我们在上面对图像金字塔循环的时候就把batch_size维度给去掉了
            #------------------------------------------------------------------#
            cls_prob = out[i][0][:, :, 1]
            roi = out[i][1]
            #--------------------------------------------#
            #   取出每个缩放后图片的高宽
            #--------------------------------------------#
            out_h, out_w = cls_prob.shape
            out_side = max(out_h, out_w)
            #--------------------------------------------#
            #   解码的过程
            #--------------------------------------------#
            rectangle = utils.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0])
            rectangles.extend(rectangle)

        #-----------------------------------------#
        #    进行非极大抑制
        #-----------------------------------------#
        rectangles = np.array(utils.NMS(rectangles, 0.7))

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------------------#
        #    稍微精确计算人脸框
        #    Rnet部分
        #-----------------------------------------#
        predict_24_batch = []
        for rectangle in rectangles:
            #--------------------------------------------#
            #    利用获取到的粗略坐标,在原图上进行截取
            #--------------------------------------------#
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
            #--------------------------------------------#
            #    将截取到的图片进行resize,调整成24x24的大小
            #--------------------------------------------#
            scale_img = cv2.resize(crop_img, (24, 24))
            predict_24_batch.append(scale_img)

        cls_prob, roi_prob = self.Rnet.run([self.Rnet_outputs[0], self.Rnet_outputs[1]],
                                           {self.Rnet_inputs[0]: np.array(predict_24_batch).astype(np.float32)})
        # print("cls_prob: ", cls_prob.shape)
        # print("roi_prob: ", roi_prob.shape)
        #------------------------------------------#
        #    解码的过程
        #------------------------------------------#
        rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1])
        # print(rectangles)

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        #    计算人脸框
        #    onet部分
        #-----------------------------#
        predict_batch = []
        for rectangle in rectangles:
            #------------------------------------------#
            #   利用获取到的粗略坐标,在原图上进行截取
            #------------------------------------------#
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
            #-----------------------------------------------#
            #   将截取到的图片进行resize,调整成48x48的大小
            #-----------------------------------------------#
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)
        # print(predict_batch)

        cls_prob, roi_prob, pts_prob = self.Onet.run([self.Onet_outputs[0], self.Onet_outputs[1], self.Onet_outputs[2]],
                                                     {self.Onet_inputs[0]: np.array(predict_batch).astype(np.float32)})

        #-----------------------------#
        #    解码的过程
        #-----------------------------#
        rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2])

        return rectangles
Esempio n. 2
0
    def detectFace(self, img, threshold):
        """Detect the face and get the face detection box"""
        copy_img = (img.copy() - 127.5) / 127.5  # 归一化
        origin_h, origin_w, _ = copy_img.shape  # 原始图像大小
        scales = utils.calculateScales(img)  # 计算原始输入图像缩放的比例

        #-------------------------------------------------#
        # pnet部分:粗略计算人脸框
        # 先粗略预测,存放到 out
        # 然后进行解码预测,生成人脸框(粗略坐标),存放到 rectangles
        #-------------------------------------------------#
        out = []
        rectangles = []
        for scale in scales:
            hs = int(origin_h * scale)  # 缩放
            ws = int(origin_w * scale)  # 缩放
            scale_img = cv2.resize(copy_img, (ws, hs))
            inputs = np.expand_dims(scale_img, 0)
            ouput = self.Pnet.predict(inputs)
            ouput = [ouput[0][0], ouput[1][0]]  # 一张图片二维图,消除第三维数据
            out.append(ouput)
        for i in range(len(scales)):
            cls_prob = out[i][0][:, :, 1]
            out_h, out_w = cls_prob.shape
            out_side = max(out_h, out_w)
            roi = out[i][1]
            rectangle = utils.detect_face_12net(cls_prob, roi, out_side,
                                                1 / scales[i], origin_w,
                                                origin_h, threshold[0])  # 解码
            rectangles.extend(rectangle)

        rectangles = np.array(utils.NMS(rectangles, 0.7))  # 非极大抑制

        if len(rectangles) == 0:
            return []

        #--------------------------------------#
        # Rnet部分:稍微精确计算人脸框
        # 最后将人脸框转化为正方形
        #--------------------------------------#
        predict_24_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(
                                    rectangle[2])]  # 利用获取到的粗略坐标,在原图上进行截取
            scale_img = cv2.resize(crop_img, (24, 24))
            predict_24_batch.append(scale_img)

        cls_prob, roi_prob = self.Rnet.predict(np.array(predict_24_batch))

        rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles,
                                             origin_w, origin_h,
                                             threshold[1])  # 解码

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        # Onet部分:计算人脸框
        # 输出五个人脸关键点定位(眼睛、嘴角、鼻尖)
        #-----------------------------#
        predict_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(
                                    rectangle[2])]  # 利用获取到的粗略坐标,在原图上进行截取
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)

        cls_prob, roi_prob, pts_prob = self.Onet.predict(
            np.array(predict_batch))

        rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                             rectangles, origin_w, origin_h,
                                             threshold[2])  # 解码

        return rectangles
    def detectFace(self, img, threshold):
        #-----------------------------#
        #   归一化
        #-----------------------------#
        copy_img = (img.copy() - 127.5) / 127.5
        origin_h, origin_w, _ = copy_img.shape
        #-----------------------------#
        #   计算原始输入图像
        #   每一次缩放的比例
        #-----------------------------#
        scales = utils.calculateScales(img)  #比例数组

        out = []
        #-----------------------------#
        #   粗略计算人脸框
        #   pnet部分
        #-----------------------------#
        for scale in scales:  #尺寸缩放
            hs = int(origin_h * scale)  #按照比例缩放
            ws = int(origin_w * scale)
            scale_img = cv2.resize(copy_img, (ws, hs))
            inputs = scale_img.reshape(1, *scale_img.shape)  #各个尺寸pnet输入
            ouput = self.Pnet.predict(
                inputs)  #pnet输出#获得classifier,bbox_regress
            out.append(ouput)

        image_num = len(scales)
        rectangles = []
        for i in range(image_num):
            # 有人脸的概率
            cls_prob = out[i][0][0][:, :, 1]
            # 其对应的框的位置
            roi = out[i][1][0]

            # 取出每个缩放后图片的长宽
            out_h, out_w = cls_prob.shape
            out_side = max(out_h, out_w)
            #print(cls_prob.shape)
            # 解码过程
            rectangle = utils.detect_face_12net(
                cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h,
                threshold[0])  #获取人脸矩形框及其为人脸的概率[坐标,概率]
            rectangles.extend(rectangle)

        # 进行非极大抑制
        rectangles = utils.NMS(rectangles, 0.7)

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        #   稍微精确计算人脸框

        #   Rnet部分
        #-----------------------------#
        predict_24_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(
                                    rectangle[2])]  #pnet后的候选框
            scale_img = cv2.resize(crop_img, (24, 24))
            predict_24_batch.append(scale_img)  #PNET

        predict_24_batch = np.array(predict_24_batch)
        out = self.Rnet.predict(predict_24_batch)  #Rnet的预测值

        cls_prob = out[0]
        cls_prob = np.array(cls_prob)
        roi_prob = out[1]
        roi_prob = np.array(roi_prob)
        rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles,
                                             origin_w, origin_h, threshold[1])

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        #   计算人脸框
        #   onet部分
        #-----------------------------#
        predict_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)  #rnet的候选框

        predict_batch = np.array(predict_batch)
        output = self.Onet.predict(predict_batch)
        cls_prob = output[0]
        roi_prob = output[1]
        pts_prob = output[2]

        rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                             rectangles, origin_w, origin_h,
                                             threshold[2])

        return rectangles
    def detectFace(self, img, threshold):
        #-----------------------------#
        #        归一化
        #-----------------------------#
        copy_img = (img.copy() - 127.5) / 127.5
        origin_h, origin_w, _ = copy_img.shape
        # print("orgin image's shape is: ", origin_h, origin_w)
        #-----------------------------#
        #        计算原始输入图像
        #        每一次缩放的比例
        #-----------------------------#
        scales = utils.calculateScales(img)

        out = []

        #-----------------------------#
        #        粗略计算人脸框
        #        pnet部分
        #-----------------------------#
        for scale in scales:
            pnet_inputs = []
            pnet_outputs = []
            hs = int(origin_h * scale)
            ws = int(origin_w * scale)
            scale_img = cv2.resize(copy_img, (ws, hs))
            inputs = np.expand_dims(scale_img, 0).astype(np.float32)

            pnet_inputs.append(
                tritonclient.http.InferInput(self.Pnet_inputs[0], inputs.shape,
                                             'FP32'))
            pnet_inputs[0].set_data_from_numpy(inputs, binary_data=True)

            pnet_outputs.append(
                tritonclient.http.InferRequestedOutput(self.Pnet_outputs[0],
                                                       binary_data=True))
            pnet_outputs.append(
                tritonclient.http.InferRequestedOutput(self.Pnet_outputs[1],
                                                       binary_data=True))

            t1 = time.time()
            output = self.triton_client.infer("pnet_tf",
                                              inputs=pnet_inputs,
                                              outputs=pnet_outputs)
            t2 = time.time()
            # print('pnet cost: {}ms'.format(1000*(t2 - t1)))
            # print(output.as_numpy(self.Pnet_outputs[0]).shape)
            # print(output.as_numpy(self.Pnet_outputs[1]).shape)
            output = [
                output.as_numpy(self.Pnet_outputs[0])[0],
                output.as_numpy(self.Pnet_outputs[1])[0]
            ]
            out.append(output)

            # print(out)

        rectangles = []
        #-------------------------------------------------#
        #   在这个地方我们对图像金字塔的预测结果进行循环
        #   取出每张图片的种类预测和回归预测结果
        #-------------------------------------------------#
        for i in range(len(scales)):
            #------------------------------------------------------------------#
            #   为了方便理解,这里和视频上看到的不太一样
            #   因为我们在上面对图像金字塔循环的时候就把batch_size维度给去掉了
            #------------------------------------------------------------------#
            cls_prob = out[i][0][:, :, 1]
            roi = out[i][1]
            #--------------------------------------------#
            #   取出每个缩放后图片的高宽
            #--------------------------------------------#
            out_h, out_w = cls_prob.shape
            out_side = max(out_h, out_w)
            #--------------------------------------------#
            #   解码的过程
            #--------------------------------------------#
            rectangle = utils.detect_face_12net(cls_prob, roi, out_side,
                                                1 / scales[i], origin_w,
                                                origin_h, threshold[0])
            rectangles.extend(rectangle)

        #-----------------------------------------#
        #    进行非极大抑制
        #-----------------------------------------#
        rectangles = np.array(utils.NMS(rectangles, 0.7))
        # print(rectangles)

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------------------#
        #    稍微精确计算人脸框
        #    Rnet部分
        #-----------------------------------------#
        predict_24_batch = []
        for rectangle in rectangles:
            #--------------------------------------------#
            #    利用获取到的粗略坐标,在原图上进行截取
            #--------------------------------------------#
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            #--------------------------------------------#
            #    将截取到的图片进行resize,调整成24x24的大小
            #--------------------------------------------#
            scale_img = cv2.resize(crop_img, (24, 24))
            predict_24_batch.append(scale_img)

        # print('rnet的输入: ', np.array(predict_24_batch).shape)

        rnet_inputs = []
        rnet_outputs = []
        rnet_inputs.append(
            tritonclient.http.InferInput(self.Rnet_inputs[0],
                                         np.array(predict_24_batch).shape,
                                         'FP32'))
        rnet_inputs[0].set_data_from_numpy(np.array(predict_24_batch).astype(
            np.float32),
                                           binary_data=True)

        rnet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Rnet_outputs[0],
                                                   binary_data=True))
        rnet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Rnet_outputs[1],
                                                   binary_data=True))

        t1 = time.time()
        output = self.triton_client.infer("rnet_tf",
                                          inputs=rnet_inputs,
                                          outputs=rnet_outputs)
        t2 = time.time()
        # print('rnet cost: {}ms'.format(1000*(t2-t1)))
        # print(output.as_numpy(self.Rnet_outputs[0]).shape)
        # print(output.as_numpy(self.Rnet_outputs[1]).shape)
        cls_prob, roi_prob = output.as_numpy(
            self.Rnet_outputs[0]), output.as_numpy(self.Rnet_outputs[1])
        # print('cls_prob is: ')
        # print(cls_prob)
        # print('roi_prob is: ')
        # print(roi_prob)
        #-------------------------------------#
        #   解码的过程
        #-------------------------------------#
        rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles,
                                             origin_w, origin_h, threshold[1])

        if len(rectangles) == 0:
            return rectangles

        # print(rectangles)

        #-----------------------------#
        #   计算人脸框
        #   onet部分
        #-----------------------------#
        predict_batch = []
        for rectangle in rectangles:
            #------------------------------------------#
            #   利用获取到的粗略坐标,在原图上进行截取
            #------------------------------------------#
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            #-----------------------------------------------#
            #   将截取到的图片进行resize,调整成48x48的大小
            #-----------------------------------------------#
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)

        # print('onet的输入: ', np.array(predict_batch).shape)
        onet_inputs = []
        onet_outputs = []
        onet_inputs.append(
            tritonclient.http.InferInput(self.Onet_inputs[0],
                                         np.array(predict_batch).shape,
                                         'FP32'))
        onet_inputs[0].set_data_from_numpy(np.array(predict_batch).astype(
            np.float32),
                                           binary_data=True)

        onet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Onet_outputs[0],
                                                   binary_data=True))
        onet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Onet_outputs[1],
                                                   binary_data=True))
        onet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Onet_outputs[2],
                                                   binary_data=True))

        t1 = time.time()
        output = self.triton_client.infer("onet_tf",
                                          inputs=onet_inputs,
                                          outputs=onet_outputs)
        t2 = time.time()
        # print('onet cost: {}ms'.format(1000*(t2-t1)))
        cls_prob, roi_prob, pts_prob = output.as_numpy(
            self.Onet_outputs[0]), output.as_numpy(
                self.Onet_outputs[1]), output.as_numpy(self.Onet_outputs[2])

        #-------------------------------------#
        #   解码的过程
        #-------------------------------------#
        # print('cls_prob:')
        # print(cls_prob)
        # print('roi_prob:')
        # print(roi_prob)
        # print('pts_prob:')
        # print(pts_prob)
        rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                             rectangles, origin_w, origin_h,
                                             threshold[2])

        return rectangles
def detectFace(img, threshold):
    #-----------------------------#
    #  Normalized
    #-----------------------------#
    copy_img = (img.copy() - 127.5) / 127.5
    origin_h, origin_w, _ = copy_img.shape
    #-----------------------------#
    #   Calculate the original input image
    #   The ratio of each zoom
    #-----------------------------#
    scales = utils.calculateScales(img)
    out = []
    #-----------------------------#
    #   Roughly calculate the face frame
    #   pnet part
    #-----------------------------#
    for scale in scales:
        hs = int(origin_h * scale)
        ws = int(origin_w * scale)
        scale_img = cv2.resize(copy_img, (ws, hs))
        inputs = scale_img.reshape(1, *scale_img.shape)
        #ouput = self.Pnet.predict(inputs)
        ouput = Pnet.predict(inputs)
        out.append(ouput)

    image_num = len(scales)
    rectangles = []
    for i in range(image_num):
        # Probability of face
        cls_prob = out[i][0][0][:, :, 1]
        #print(cls_prob.shape)

        # The position of its corresponding box
        roi = out[i][1][0]
        #print(roi.shape)

        # Take out the length and width of each zoomed picture
        out_h, out_w = cls_prob.shape
        out_side = max(out_h, out_w)
        #print(cls_prob.shape)

        # Decoding process
        rectangle = utils.detect_face_12net(cls_prob, roi, out_side,
                                            1 / scales[i], origin_w, origin_h,
                                            0.7)
        rectangles.extend(rectangle)

    # Non-maximum suppression
    rectangles = utils.NMS(rectangles, 0.7)

    if len(rectangles) == 0:
        return rectangles

    #-----------------------------#
    #   Calculating face frame
    #   onet part
    #-----------------------------#
    predict_batch = []
    for rectangle in rectangles:
        crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                            int(rectangle[0]):int(rectangle[2])]
        scale_img = cv2.resize(crop_img, (48, 48))
        predict_batch.append(scale_img)

    predict_batch = np.array(predict_batch)
    #output = self.Onet.predict(predict_batch)
    output = Onet.predict(predict_batch)
    cls_prob = output[0]
    roi_prob = output[1]
    pts_prob = output[2]

    rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                         rectangles, origin_w, origin_h, 0.7)

    return rectangles