def detcet(self, img, threshold): """归一化""" copy_img = (img.copy() - 127.5) / 127.5 origin_h, origin_w, _ = copy_img.shape print(origin_h, origin_w) """ 计算原始输入图像 每次缩放比例 """ scales = utils.caculateScales(img) print(scales) out = [] """ 粗略计算人脸框 Pnet部分 """ for scale in scales: hs = int(origin_h * scale) ws = int(origin_w * scale) scale_img = cv2.resize(copy_img, (ws, hs)) #print(ws, hs) inputs = scale_img.reshape(1, *scale_img.shape) output = self.Pnet.predict(inputs) out.append(output) image_num = len(scales) rectangles = [] for i in range(image_num): """有人脸的概率""" cls_prob = out[i][0][0][:, :, 1] """有人脸的框对应的位置""" roi = out[i][1][0] """取出每个缩放后图片的长宽""" out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) #print(cls_prob.shape) """解码过程""" rectangel = utils.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangel) print(np.shape(rectangles)) # for i in range(len(rectangles)): # bbox = rectangles[i] # crop_img = img[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])] # if bbox[3] >= 80: # cv2.imshow('crop_img', crop_img) # cv2.waitKey(0) """进行非极大抑制""" rectangles = utils.NMS(rectangles, 0.7) if len(rectangles) == 0: return rectangles """ 稍微精确计算人脸框 Rnet部分 """ predict_24_batch = [] for rectangel in rectangles: crop_img = copy_img[int(rectangel[1]):int(rectangel[3]), int(rectangel[0]):int(rectangel[2])] scale_img = cv2.resize(crop_img, (24, 24)) predict_24_batch.append(scale_img) predict_24_batch = np.array(predict_24_batch) out = self.Rnet.predict(predict_24_batch) """可信度""" cls_prob = out[0] cls_prob = np.array(cls_prob) """如何调整某一张图片的rectangel""" roi_prob = out[1] roi_prob = np.array(roi_prob) rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) print(np.shape(rectangles)) if len(rectangles) == 0: return rectangles """ 计算人脸框 Onet部分 """ predict_batch = [] for rectangel in rectangles: crop_img = copy_img[int(rectangel[1]):int(rectangel[3]), int(rectangel[0]):int(rectangel[2])] scale_img = cv2.resize(crop_img, (48, 48)) predict_batch.append(scale_img) predict_batch = np.array(predict_batch) out = self.Onet.predict(predict_batch) cls_prob = out[0] roi_prob = out[1] pts_prob = out[2] rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2]) print(np.shape(rectangles)) return rectangles
def detectFace(self, img, threshold): #-----------------------------# # 归一化 #-----------------------------# copy_img = (img.copy() - 127.5) / 127.5 origin_h, origin_w, _ = copy_img.shape #-----------------------------# # 计算原始输入图像 # 每一次缩放的比例 #-----------------------------# scales = utils.calculateScales(img) out = [] #-----------------------------# # 粗略计算人脸框 # pnet部分 #-----------------------------# for scale in scales: hs = int(origin_h * scale) ws = int(origin_w * scale) scale_img = cv2.resize(copy_img, (ws, hs)) inputs = scale_img.reshape(1, *scale_img.shape) ouput = self.Pnet.predict(inputs) out.append(ouput) image_num = len(scales) rectangles = [] for i in range(image_num): # 有人脸的概率 cls_prob = out[i][0][0][:, :, 1] # 其对应的框的位置 roi = out[i][1][0] # 取出每个缩放后图片的长宽 out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) # 解码过程 rectangle = utils.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangle) # for i in range(len(rectangles)): # bbox = rectangles[i] # crop_img = img[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])] # if bbox[3]-bbox[1]>80: # cv2.imshow("crop_img",crop_img) # cv2.waitKey(0) # 进行非极大抑制 rectangles = utils.NMS(rectangles, 0.7) if len(rectangles) == 0: return rectangles #-----------------------------# # 稍微精确计算人脸框 # Rnet部分 #-----------------------------# predict_24_batch = [] for rectangle in rectangles: crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] scale_img = cv2.resize(crop_img, (24, 24)) predict_24_batch.append(scale_img) predict_24_batch = np.array(predict_24_batch) out = self.Rnet.predict(predict_24_batch) # 可信度 cls_prob = out[0] cls_prob = np.array(cls_prob) # 如何调整某一张图片对应的rectangle roi_prob = out[1] roi_prob = np.array(roi_prob) rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) if len(rectangles) == 0: return rectangles #-----------------------------# # 计算人脸框 # onet部分 #-----------------------------# predict_batch = [] for rectangle in rectangles: crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] scale_img = cv2.resize(crop_img, (48, 48)) predict_batch.append(scale_img) predict_batch = np.array(predict_batch) output = self.Onet.predict(predict_batch) cls_prob = output[0] roi_prob = output[1] pts_prob = output[2] rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2]) return rectangles
def detectFace(self, img, threshold): #-----------------------------# # Normalization #-----------------------------# copy_img = (img.copy() - 127.5) / 127.5 origin_h, origin_w, _ = copy_img.shape #-----------------------------# # calculate the zoom proportion of # input image #-----------------------------# scales = utils.calculateScales(img) out = [] #-----------------------------# # # pnet part #-----------------------------# for scale in scales: hs = int(origin_h * scale) ws = int(origin_w * scale) scale_img = cv2.resize(copy_img, (ws, hs)) inputs = scale_img.reshape(1, *scale_img.shape) ouput = self.Pnet.predict(inputs) out.append(ouput) image_num = len(scales) rectangles = [] for i in range(image_num): # the proporation of face cls_prob = out[i][0][0][:, :, 1] # the position of square roi = out[i][1][0] # get the size of zoomed img out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) # encode the original size rectangle = utils.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangle) # for i in range(len(rectangles)): # bbox = rectangles[i] # crop_img = img[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])] # if bbox[3]-bbox[1]>80: # cv2.imshow("crop_img",crop_img) # cv2.waitKey(0) # Non-maximum suppression rectangles = utils.NMS(rectangles, 0.7) if len(rectangles) == 0: return rectangles #-----------------------------# # get accurate position # Rnet #-----------------------------# predict_24_batch = [] for rectangle in rectangles: crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] scale_img = cv2.resize(crop_img, (24, 24)) predict_24_batch.append(scale_img) predict_24_batch = np.array(predict_24_batch) out = self.Rnet.predict(predict_24_batch) # Confidence cls_prob = out[0] cls_prob = np.array(cls_prob) # Adjustment rectangle of every image roi_prob = out[1] roi_prob = np.array(roi_prob) rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) if len(rectangles) == 0: return rectangles #-----------------------------# # # onet #-----------------------------# predict_batch = [] for rectangle in rectangles: crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] scale_img = cv2.resize(crop_img, (48, 48)) predict_batch.append(scale_img) predict_batch = np.array(predict_batch) output = self.Onet.predict(predict_batch) cls_prob = output[0] roi_prob = output[1] pts_prob = output[2] rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2]) return rectangles
def detectFace(self, img, threshold): #-----------------------------# # 归一化 #-----------------------------# copy_img = (img.copy() - 127.5) / 127.5 origin_h, origin_w, _ = copy_img.shape #-----------------------------# # 计算原始输入图像 # 每一次缩放的比例 #-----------------------------# scales = utils.calculateScales(img) out = [] #-----------------------------# # 粗略计算人脸框 # pnet部分 #-----------------------------# for scale in scales: hs = int(origin_h * scale) ws = int(origin_w * scale) scale_img = cv2.resize(copy_img, (ws, hs)) inputs = np.expand_dims(scale_img, 0) ouput = self.Pnet.predict(inputs) #---------------------------------------------# # 每次选取图像金字塔中的一张图片进行预测 # 预测结果也是一张图片的, # 所以我们可以将对应的batch_size维度给消除掉 #---------------------------------------------# ouput = [ouput[0][0], ouput[1][0]] out.append(ouput) rectangles = [] #-------------------------------------------------# # 在这个地方我们对图像金字塔的预测结果进行循环 # 取出每张图片的种类预测和回归预测结果 #-------------------------------------------------# for i in range(len(scales)): #------------------------------------------------------------------# # 为了方便理解,这里和视频上看到的不太一样 # 因为我们在上面对图像金字塔循环的时候就把batch_size维度给去掉了 #------------------------------------------------------------------# cls_prob = out[i][0][:, :, 1] roi = out[i][1] #-------------------------------------# # 取出每个缩放后图片的高宽 #-------------------------------------# out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) #-------------------------------------# # 解码的过程 #-------------------------------------# rectangle = utils.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangle) #-------------------------------------# # 进行非极大抑制 #-------------------------------------# rectangles = np.array(utils.NMS(rectangles, 0.7)) if len(rectangles) == 0: return rectangles #-----------------------------# # 稍微精确计算人脸框 # Rnet部分 #-----------------------------# predict_24_batch = [] for rectangle in rectangles: #------------------------------------------# # 利用获取到的粗略坐标,在原图上进行截取 #------------------------------------------# crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] #-----------------------------------------------# # 将截取到的图片进行resize,调整成24x24的大小 #-----------------------------------------------# scale_img = cv2.resize(crop_img, (24, 24)) predict_24_batch.append(scale_img) cls_prob, roi_prob = self.Rnet.predict(np.array(predict_24_batch)) #-------------------------------------# # 解码的过程 #-------------------------------------# rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) if len(rectangles) == 0: return rectangles #-----------------------------# # 计算人脸框 # onet部分 #-----------------------------# predict_batch = [] for rectangle in rectangles: #------------------------------------------# # 利用获取到的粗略坐标,在原图上进行截取 #------------------------------------------# crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] #-----------------------------------------------# # 将截取到的图片进行resize,调整成48x48的大小 #-----------------------------------------------# scale_img = cv2.resize(crop_img, (48, 48)) predict_batch.append(scale_img) cls_prob, roi_prob, pts_prob = self.Onet.predict( np.array(predict_batch)) #-------------------------------------# # 解码的过程 #-------------------------------------# rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2]) return rectangles
def detectFace(self, img, threshold): #-----------------------------# # 归一化 #-----------------------------# copy_img = (img.copy() - 127.5) / 127.5 origin_h, origin_w, _ = copy_img.shape # print("orgin image's shape is: ", origin_h, origin_w) #-----------------------------# # 计算原始输入图像 # 每一次缩放的比例 #-----------------------------# scales = utils.calculateScales(img) out = [] #-----------------------------# # 粗略计算人脸框 # pnet部分 #-----------------------------# for scale in scales: pnet_inputs = [] pnet_outputs = [] hs = int(origin_h * scale) ws = int(origin_w * scale) scale_img = cv2.resize(copy_img, (ws, hs)) inputs = np.expand_dims(scale_img, 0).astype(np.float32) pnet_inputs.append( tritonclient.http.InferInput(self.Pnet_inputs[0], inputs.shape, 'FP32')) pnet_inputs[0].set_data_from_numpy(inputs, binary_data=True) pnet_outputs.append( tritonclient.http.InferRequestedOutput(self.Pnet_outputs[0], binary_data=True)) pnet_outputs.append( tritonclient.http.InferRequestedOutput(self.Pnet_outputs[1], binary_data=True)) t1 = time.time() output = self.triton_client.infer("pnet", inputs=pnet_inputs, outputs=pnet_outputs) t2 = time.time() # print('pnet cost: {}ms'.format(1000*(t2 - t1))) # print(output.as_numpy(self.Pnet_outputs[0]).shape) # print(output.as_numpy(self.Pnet_outputs[1]).shape) output = [ output.as_numpy(self.Pnet_outputs[0])[0], output.as_numpy(self.Pnet_outputs[1])[0] ] out.append(output) # print(out) rectangles = [] #-------------------------------------------------# # 在这个地方我们对图像金字塔的预测结果进行循环 # 取出每张图片的种类预测和回归预测结果 #-------------------------------------------------# for i in range(len(scales)): #------------------------------------------------------------------# # 为了方便理解,这里和视频上看到的不太一样 # 因为我们在上面对图像金字塔循环的时候就把batch_size维度给去掉了 #------------------------------------------------------------------# cls_prob = out[i][0][:, :, 1] roi = out[i][1] #--------------------------------------------# # 取出每个缩放后图片的高宽 #--------------------------------------------# out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) #--------------------------------------------# # 解码的过程 #--------------------------------------------# rectangle = utils.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangle) #-----------------------------------------# # 进行非极大抑制 #-----------------------------------------# rectangles = np.array(utils.NMS(rectangles, 0.7)) # print(rectangles) if len(rectangles) == 0: return rectangles #-----------------------------------------# # 稍微精确计算人脸框 # Rnet部分 #-----------------------------------------# predict_24_batch = [] for rectangle in rectangles: #--------------------------------------------# # 利用获取到的粗略坐标,在原图上进行截取 #--------------------------------------------# crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] #--------------------------------------------# # 将截取到的图片进行resize,调整成24x24的大小 #--------------------------------------------# scale_img = cv2.resize(crop_img, (24, 24)) predict_24_batch.append(scale_img) # print('rnet的输入: ', np.array(predict_24_batch).shape) rnet_inputs = [] rnet_outputs = [] rnet_inputs.append( tritonclient.http.InferInput(self.Rnet_inputs[0], np.array(predict_24_batch).shape, 'FP32')) rnet_inputs[0].set_data_from_numpy(np.array(predict_24_batch).astype( np.float32), binary_data=True) rnet_outputs.append( tritonclient.http.InferRequestedOutput(self.Rnet_outputs[0], binary_data=True)) rnet_outputs.append( tritonclient.http.InferRequestedOutput(self.Rnet_outputs[1], binary_data=True)) t1 = time.time() output = self.triton_client.infer("rnet", inputs=rnet_inputs, outputs=rnet_outputs) t2 = time.time() # print('rnet cost: {}ms'.format(1000*(t2-t1))) # print(output.as_numpy(self.Rnet_outputs[0]).shape) # print(output.as_numpy(self.Rnet_outputs[1]).shape) cls_prob, roi_prob = output.as_numpy( self.Rnet_outputs[0]), output.as_numpy(self.Rnet_outputs[1]) # print('cls_prob is: ') # print(cls_prob) # print('roi_prob is: ') # print(roi_prob) #-------------------------------------# # 解码的过程 #-------------------------------------# rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) if len(rectangles) == 0: return rectangles # print(rectangles) #-----------------------------# # 计算人脸框 # onet部分 #-----------------------------# predict_batch = [] for rectangle in rectangles: #------------------------------------------# # 利用获取到的粗略坐标,在原图上进行截取 #------------------------------------------# crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] #-----------------------------------------------# # 将截取到的图片进行resize,调整成48x48的大小 #-----------------------------------------------# scale_img = cv2.resize(crop_img, (48, 48)) predict_batch.append(scale_img) # print('onet的输入: ', np.array(predict_batch).shape) onet_inputs = [] onet_outputs = [] onet_inputs.append( tritonclient.http.InferInput(self.Onet_inputs[0], np.array(predict_batch).shape, 'FP32')) onet_inputs[0].set_data_from_numpy(np.array(predict_batch).astype( np.float32), binary_data=True) onet_outputs.append( tritonclient.http.InferRequestedOutput(self.Onet_outputs[0], binary_data=True)) onet_outputs.append( tritonclient.http.InferRequestedOutput(self.Onet_outputs[1], binary_data=True)) onet_outputs.append( tritonclient.http.InferRequestedOutput(self.Onet_outputs[2], binary_data=True)) t1 = time.time() output = self.triton_client.infer("onet", inputs=onet_inputs, outputs=onet_outputs) t2 = time.time() # print('onet cost: {}ms'.format(1000*(t2-t1))) cls_prob, roi_prob, pts_prob = output.as_numpy( self.Onet_outputs[0]), output.as_numpy( self.Onet_outputs[1]), output.as_numpy(self.Onet_outputs[2]) #-------------------------------------# # 解码的过程 #-------------------------------------# # print('cls_prob:') # print(cls_prob) # print('roi_prob:') # print(roi_prob) # print('pts_prob:') # print(pts_prob) rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2]) return rectangles