def o(self): """transform out of tensor to numpy filter with confidence calculate coordinates filter with NMS draw""" start_time = datetime.datetime.now() data, prior = self.r() with torch.no_grad(): confi, offset = self.onet(data.cuda()) confi = confi.cpu().numpy().flatten() offset = offset.cpu().numpy() offset, prior, confi = offset[confi >= 0.999], prior[ confi >= 0.999], confi[confi >= 0.999] offset, landmarks = offset[:, :4], offset[:, 4:] offset, landmarks = utils.transform(offset, landmarks, prior) boxes = np.hstack( (offset, np.expand_dims(confi, axis=1), landmarks)) # 将偏移量与置信度结合,进行NMS boxes = utils.NMS(boxes, threshold=0.4, ismin=True) end_time = datetime.datetime.now() print("ONet cost {}ms".format( (end_time - start_time).microseconds / 1000)) return boxes
def r(self): """transform out of tensor to numpy filter with confidence calculate coordinates filter with NMS crop image from original image for ONet's input draw""" start_time = time.time() data, prior = self.p() with torch.no_grad(): confi, offset = self.rnet(data.cuda()) confi = confi.cpu().numpy().flatten() offset = offset.cpu().numpy() offset, prior, confi = offset[confi >= 0.99], prior[confi >= 0.99], confi[confi >= 0.99] offset, landmarks = offset[:, :4], offset[:, 4:] offset, landmarks = utils.transform(offset, landmarks, prior) boxes = np.hstack((offset, np.expand_dims(confi, axis=1), landmarks)) boxes = utils.NMS(boxes, threshold=0.6, ismin=False) o_data, o_prior = utils.crop_to_square(boxes[:, :5], 48, self.image) o_prior = np.stack(o_prior, axis=0) o_data = torch.stack(o_data, dim=0) end_time = time.time() print("RNet create {} candidate items\ncost {}s!".format(o_data.size(0), end_time - start_time)) utils.draw(boxes, self.test_img, "RNet") return o_data, o_prior
def p(self): """transform out of tensor to numpy filter with confidence calculate coordinates filter with NMS crop image from original image for RNet's input draw""" r_prior, r_data = [], [] # collect RNet's prior, RNet's input coordinates = [] # collect coordinates for draw count = 0 start_time = time.time() while min(self.img.size) > 12: scal = 0.707**count # 缩放比例,可以还原到原图 0.707为面积的一半 input = tf.ToTensor()(self.img).unsqueeze(dim=0) - 0.5 with torch.no_grad(): confi, offset = self.pnet(input.cuda()) W = offset.size(3) # 取出图片的w值 confi = confi.permute(0, 2, 3, 1) confi = confi.reshape(-1).cpu().numpy() offset = offset.permute(0, 2, 3, 1) # 换轴,将四个通道数据组合到一起 offset = offset.reshape((-1, 14)).cpu().numpy() o_index = np.arange(len(offset)).reshape(-1, 1) # 特征图W_out*H_out offset, o_index, confi = offset[confi >= 0.9], o_index[ confi >= 0.9], confi[confi >= 0.9] y_index, x_index = divmod(o_index, W) # 索引/w 在特征图中对应索引为(x,y)=(余数, 商) x1, y1, x2, y2 = x_index * 2 / scal, y_index * 2 / scal, ( x_index * 2 + 12) / scal, (y_index * 2 + 12) / scal # 左上角=索引*步长 右上角=左上角+边长 p_prior = np.hstack((x1, y1, x2, y2)) # 将原图坐标组合为一个二维数组 offset, landmarks = offset[:, :4], offset[:, 4:] offset, landmarks = utils.transform(offset, landmarks, p_prior) boxes = np.hstack((offset, np.expand_dims(confi, axis=1), landmarks)) # 将偏移量与置信度结合,进行NMS boxes = utils.NMS(boxes, threshold=0.7, ismin=False) coordinates.extend(boxes.tolist()) if boxes.shape[0] == 0: break data, prior = utils.crop_to_square(boxes[:, :5], 24, self.image) r_prior.extend(prior) r_data.extend(data) self.img = self.pyramid() # 图像金字塔 count += 1 r_prior = np.stack(r_prior, axis=0) # 数据重组,重新装载为numpy和tensor r_data = torch.stack(r_data, dim=0) end_time = time.time() print("PNet create {} candidate items\ncost {}s!".format( r_data.size(0), end_time - start_time)) utils.draw(np.stack(coordinates, axis=0), self.test_img, "PNet") return r_data, r_prior
def p(self): """transform out of tensor to numpy filter with confidence calculate coordinates filter with NMS crop image from original image for RNet's input draw""" start_time = datetime.datetime.now() r_prior, r_data = [], [] # collect RNet's prior, RNet's input coordinates = [] # collect coordinates for draw count = 0 while min(self.img.size) > 12: scal = 0.707**count # 0.707 make the area half of origin image input = tf.ToTensor()(self.img).unsqueeze(dim=0) - 0.5 with torch.no_grad(): confi, offset = self.pnet(input.cuda()) confi, offset = confi.transpose(1, -1), offset.transpose(1, -1) mask = confi[..., 0] > 0.9 confi = confi[mask].cpu().numpy() # filter confi offset = offset[mask].cpu().numpy() # filter offset index = mask.nonzero().cpu().numpy() # index x_index, y_index = index[:, 1:2], index[:, 2:3] x1, y1, x2, y2 = x_index * 2 / scal, y_index * 2 / scal, ( x_index * 2 + 12 ) / scal, ( y_index * 2 + 12 ) / scal # top_left*scal=index*stride bottom_right*scal=top_left+12 p_prior = np.hstack(([x1, y1, x2, y2])) # translate to numpy which ndim=2 offset, landmarks = offset[:, :4], offset[:, 4:] offset, landmarks = utils.transform(offset, landmarks, p_prior) boxes = np.hstack( (offset, confi, landmarks)) # [[offset+confi+landmarks]] for NMS boxes = utils.NMS(boxes, threshold=0.7, ismin=False) coordinates.extend(boxes.tolist()) if boxes.shape[0] == 0: break data, prior = utils.crop_to_square(boxes[:, :5], 24, self.image) r_prior.extend(prior) r_data.extend(data) self.img = self.pyramid() count += 1 r_prior = np.stack(r_prior, axis=0) r_data = torch.stack(r_data, dim=0) end_time = datetime.datetime.now() print("PNet cost {}ms".format( (end_time - start_time).microseconds / 1000)) return r_data, r_prior
def rnet_detect(self, image, pnet_boxes): img_dataset = [] pnet_boxes = utils.convertToRectangle(pnet_boxes) for pnet_box in pnet_boxes: x1 = int(pnet_box[0]) y1 = int(pnet_box[1]) x2 = int(pnet_box[2]) y2 = int(pnet_box[3]) img = image.crop((x1, y1, x2, y2)) img = img.resize((24, 24)) img_data = self.trans(img) img_dataset.append(img_data) img_dataset = torch.stack(img_dataset).to(self.device) with torch.no_grad(): confidence, offset, _ = self.rnet(img_dataset) confidence = confidence.cpu().detach().numpy() offset = offset.cpu().detach().numpy() indexs, _ = np.where(confidence > 0.93) if indexs.shape[0] == 0: return np.array([]) else: boxes = pnet_boxes[indexs] # 直接返回到P网络传入的真实框 x1_array = boxes[:, 0] y1_array = boxes[:, 1] x2_array = boxes[:, 2] y2_array = boxes[:, 3] w_array = x2_array - x1_array h_array = y2_array - y1_array offset = offset[indexs] confidence = confidence[indexs] x1_real = x1_array + w_array * offset[:, 0] y1_real = y1_array + h_array * offset[:, 1] x2_real = x2_array + w_array * offset[:, 2] y2_real = y2_array + h_array * offset[:, 3] box = np.stack( [x1_real, y1_real, x2_real, y2_real, confidence[:, 0]], axis=1) return utils.NMS(box, 0.3)
def pnet_detect(self, image): # 用于存放所有经过NMS删选的真实框 boxes_nms_all = [] w, h = image.size # 侦测图片中各种大小的人脸 # min_length = np.minimum(w, h) # scale = scale_new = 1 # 用于侦测图片,且图片里的人脸比较大 scale = 0.7 # 用于侦测视频,且视频里的人脸比较大 # scale = 0.7**10 w_ = int(w * scale) h_ = int(h * scale) min_length = np.minimum(w_, h_) scale_new = min_length / np.minimum(w, h) image = image.resize((w_, h_)) while min_length > 12: img_data = self.trans(image).to(self.device) # 升维,因为存在批次这一维度 img_data.unsqueeze_(0) with torch.no_grad(): confidence, offset, _ = self.pnet(img_data) confidence = confidence[0][0].cpu().detach() offset = offset[0].cpu().detach() # 根据阈值先删除掉一些置信度低的候选框,并返回符合要求的索引 indexs = torch.nonzero(torch.gt(confidence, 0.8)) if indexs.shape[0] == 0: nms = np.array([]) else: boxes = self.backToImage(np.array(indexs, dtype=np.float), offset, scale_new, confidence) nms = utils.NMS(boxes, 0.3) boxes_nms_all.extend(nms) scale *= 0.7 w_ = int(w * scale) h_ = int(h * scale) min_length = np.minimum(w_, h_) scale_new = min_length / np.minimum(w, h) image = image.resize((w_, h_)) if len(boxes_nms_all) == 0: return np.array([]) boxes_nms_all = np.stack(boxes_nms_all) return boxes_nms_all
def o(self): """transform out of tensor to numpy filter with confidence calculate coordinates filter with NMS draw""" data, prior = self.r() confi, offset = self.onet(data.cuda()) confi = confi.data.cpu().numpy().flatten() offset = offset.data.cpu().numpy() offset, prior, confi = offset[confi >= 0.999], prior[confi >= 0.999], confi[confi >= 0.999] offset, landmarks = offset[:, :4], offset[:, 4:] offset, landmarks = utils.transform(offset, landmarks, prior) boxes = np.hstack((offset, np.expand_dims(confi, axis=1), landmarks)) # 将偏移量与置信度以及landmarks结合,进行NMS boxes = utils.NMS(boxes, threshold=0.4, ismin=True) print("ONet create {} candidate items".format(boxes.shape[0])) utils.draw(boxes, self.test_img, "ONet")
def pnet_prediction(img, PNet, thresholds): temp = img.copy() / 255. orig_h, orig_w, orig_c = temp.shape # 生成用于制作图像金字塔的缩放比例列表 scales = utils.calculate_scales(temp) PNet_outputs = [] t0 = time.time() # 生成图像金字塔列表并逐一预测结果 for scale in scales: scale_h = int(orig_h * scale) scale_w = int(orig_w * scale) scaled_img = cv2.resize(temp, (scale_w, scale_h)) # OpenCV中宽在前 input_img = scaled_img.reshape(1, *scaled_img.shape) # reshape to (1, scale_h, scale_w, orig_c) pred = PNet.predict(input_img) # pred is a list of 2 arrays with the shapes (1, ?, ?, 2) & (1, ?, ?, 4) PNet_outputs.append(pred) img_num = len(scales) rectangles_list = [] for i in range(img_num): prob = PNet_outputs[i][0][0][:, :, 0] # 是“人脸”的置信度,对应前面(1, ?, ?, 1)中的(?, ?) roi = PNet_outputs[i][1][0] # 人脸框的坐标偏移比例,对应前面(1, ?, ?, 4)中的(?, ?, 4) out_h, out_w = prob.shape # 每个点的值对应一个12 x 12框是否有”人“的置信度 out_side = max(out_h, out_w) # ??? prob = np.swapaxes(prob, 0, 1) roi = np.swapaxes(roi, 0, 2) # shape变为(4, ?, ?) rectangles = utils.pnet_detect_face(prob, roi, out_side, 1 / scales[i], orig_w, orig_h, thresholds[0]) rectangles_list.extend(rectangles) # 每个rectangles包含(num, x1, y1, x2, y2, score) rectangles_list = utils.NMS(rectangles_list, 0.7, 'iou') t1 = time.time() print("Inference time for P-Net is " + str(t1 - t0)) return rectangles_list
batch_size=1, shuffle=True, num_workers=2) result = [] for data in iter(test_data): data = data.float() output = model(data) output = torch.max(output, 1) result.append(output) result = np.asarray(result) input = np.asarray(input).reshape(-1, 1) candidate = np.asarray(candidate).reshape(-1, 4) nms_sum = np.concatenate((candidate, input, result), axis=1) # NMS regions = utils.NMS(nms_sum) # draw rectangles on the original image fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6)) ax.imshow(image) for x, y, w, h, label in regions: print(x, y, w, h) rect = mpatches.Rectangle((x, y), w, h, fill=False, edgecolor='red', linewidth=1) ax.add_patch(rect) plt.annotate(label, xy=(x, y))
def detectFace(self, img, threshold): #-----------------------------# # 归一化 #-----------------------------# copy_img = (img.copy() - 127.5) / 127.5 origin_h, origin_w, _ = copy_img.shape print("orgin image's shape is: ", origin_h, origin_w) #-----------------------------# # 计算原始输入图像 # 每一次缩放的比例 #-----------------------------# scales = utils.calculateScales(img) out = [] #-----------------------------# # 粗略计算人脸框 # pnet部分 #-----------------------------# for scale in scales: hs = int(origin_h * scale) ws = int(origin_w * scale) scale_img = cv2.resize(copy_img, (ws, hs)) inputs = np.expand_dims(scale_img, 0).astype(np.float32) # print('inputs shape is: ', inputs.shape) output = self.Pnet.run([self.Pnet_outputs[0], self.Pnet_outputs[1]], {self.Pnet_inputs[0]: inputs}) # print(output[0].shape) # print(output[1].shape) output = [output[0][0], output[1][0]] out.append(output) # print(out) rectangles = [] #----------------------------------------------------------# # 在这个地方我们对图像金字塔的预测结果进行循环 # 取出每张图片的种类预测和回归预测结果 #----------------------------------------------------------# for i in range(len(scales)): #------------------------------------------------------------------# # 为了方便理解,这里和视频上看到的不太一样 # 因为我们在上面对图像金字塔循环的时候就把batch_size维度给去掉了 #------------------------------------------------------------------# cls_prob = out[i][0][:, :, 1] roi = out[i][1] #--------------------------------------------# # 取出每个缩放后图片的高宽 #--------------------------------------------# out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) #--------------------------------------------# # 解码的过程 #--------------------------------------------# rectangle = utils.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangle) #-----------------------------------------# # 进行非极大抑制 #-----------------------------------------# rectangles = np.array(utils.NMS(rectangles, 0.7)) if len(rectangles) == 0: return rectangles #-----------------------------------------# # 稍微精确计算人脸框 # Rnet部分 #-----------------------------------------# predict_24_batch = [] for rectangle in rectangles: #--------------------------------------------# # 利用获取到的粗略坐标,在原图上进行截取 #--------------------------------------------# crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] #--------------------------------------------# # 将截取到的图片进行resize,调整成24x24的大小 #--------------------------------------------# scale_img = cv2.resize(crop_img, (24, 24)) predict_24_batch.append(scale_img) cls_prob, roi_prob = self.Rnet.run([self.Rnet_outputs[0], self.Rnet_outputs[1]], {self.Rnet_inputs[0]: np.array(predict_24_batch).astype(np.float32)}) # print("cls_prob: ", cls_prob.shape) # print("roi_prob: ", roi_prob.shape) #------------------------------------------# # 解码的过程 #------------------------------------------# rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) # print(rectangles) if len(rectangles) == 0: return rectangles #-----------------------------# # 计算人脸框 # onet部分 #-----------------------------# predict_batch = [] for rectangle in rectangles: #------------------------------------------# # 利用获取到的粗略坐标,在原图上进行截取 #------------------------------------------# crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] #-----------------------------------------------# # 将截取到的图片进行resize,调整成48x48的大小 #-----------------------------------------------# scale_img = cv2.resize(crop_img, (48, 48)) predict_batch.append(scale_img) # print(predict_batch) cls_prob, roi_prob, pts_prob = self.Onet.run([self.Onet_outputs[0], self.Onet_outputs[1], self.Onet_outputs[2]], {self.Onet_inputs[0]: np.array(predict_batch).astype(np.float32)}) #-----------------------------# # 解码的过程 #-----------------------------# rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2]) return rectangles
def evaluate(model, path, iou_thres, conf_thres, nms_thres, image_size, batch_size, num_workers, device): # 모델을 evaluation mode로 설정 model.eval() # 데이터셋, 데이터로더 설정 dataset = datasets.ListDataset(path, image_size, augment=False, multiscale=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=dataset.collate_fn) labels = [] sample_metrics = [] # List[Tuple] -> [(TP, confs, pred)] entire_time = 0 for _, images, targets in tqdm.tqdm(dataloader, desc='Evaluate method', leave=False): if targets is None: continue # Extract labels labels.extend(targets[:, 1].tolist()) # Rescale targets targets[:, 2:] = utils.xywh2xyxy(targets[:, 2:]) targets[:, 2:] *= image_size # Predict objects start_time = time.time() with torch.no_grad(): images = images.to(device) outputs = model(images) outputs = utils.NMS(outputs, conf_thres, nms_thres) entire_time += time.time() - start_time # Compute true positives, predicted scores and predicted labels per batch sample_metrics.extend( utils.get_batch_statistics(outputs, targets, iou_thres)) # Concatenate sample statistics if len(sample_metrics) == 0: true_positives, pred_scores, pred_labels = np.array([]), np.array( []), np.array([]) else: true_positives, pred_scores, pred_labels = [ np.concatenate(x, 0) for x in list(zip(*sample_metrics)) ] # Compute AP precision, recall, AP, f1, ap_class = utils.ap_per_class( true_positives, pred_scores, pred_labels, labels) # Compute inference time and fps inference_time = entire_time / dataset.__len__() fps = 1 / inference_time # Export inference time to miliseconds inference_time *= 1000 return precision, recall, AP, f1, ap_class, inference_time, fps
def detectFace(self, img, threshold): """Detect the face and get the face detection box""" copy_img = (img.copy() - 127.5) / 127.5 # 归一化 origin_h, origin_w, _ = copy_img.shape # 原始图像大小 scales = utils.calculateScales(img) # 计算原始输入图像缩放的比例 #-------------------------------------------------# # pnet部分:粗略计算人脸框 # 先粗略预测,存放到 out # 然后进行解码预测,生成人脸框(粗略坐标),存放到 rectangles #-------------------------------------------------# out = [] rectangles = [] for scale in scales: hs = int(origin_h * scale) # 缩放 ws = int(origin_w * scale) # 缩放 scale_img = cv2.resize(copy_img, (ws, hs)) inputs = np.expand_dims(scale_img, 0) ouput = self.Pnet.predict(inputs) ouput = [ouput[0][0], ouput[1][0]] # 一张图片二维图,消除第三维数据 out.append(ouput) for i in range(len(scales)): cls_prob = out[i][0][:, :, 1] out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) roi = out[i][1] rectangle = utils.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) # 解码 rectangles.extend(rectangle) rectangles = np.array(utils.NMS(rectangles, 0.7)) # 非极大抑制 if len(rectangles) == 0: return [] #--------------------------------------# # Rnet部分:稍微精确计算人脸框 # 最后将人脸框转化为正方形 #--------------------------------------# predict_24_batch = [] for rectangle in rectangles: crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int( rectangle[2])] # 利用获取到的粗略坐标,在原图上进行截取 scale_img = cv2.resize(crop_img, (24, 24)) predict_24_batch.append(scale_img) cls_prob, roi_prob = self.Rnet.predict(np.array(predict_24_batch)) rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) # 解码 if len(rectangles) == 0: return rectangles #-----------------------------# # Onet部分:计算人脸框 # 输出五个人脸关键点定位(眼睛、嘴角、鼻尖) #-----------------------------# predict_batch = [] for rectangle in rectangles: crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int( rectangle[2])] # 利用获取到的粗略坐标,在原图上进行截取 scale_img = cv2.resize(crop_img, (48, 48)) predict_batch.append(scale_img) cls_prob, roi_prob, pts_prob = self.Onet.predict( np.array(predict_batch)) rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2]) # 解码 return rectangles
def detectFace(img, threshold): #-----------------------------# # Normalized #-----------------------------# copy_img = (img.copy() - 127.5) / 127.5 origin_h, origin_w, _ = copy_img.shape #-----------------------------# # Calculate the original input image # The ratio of each zoom #-----------------------------# scales = utils.calculateScales(img) out = [] #-----------------------------# # Roughly calculate the face frame # pnet part #-----------------------------# for scale in scales: hs = int(origin_h * scale) ws = int(origin_w * scale) scale_img = cv2.resize(copy_img, (ws, hs)) inputs = scale_img.reshape(1, *scale_img.shape) #ouput = self.Pnet.predict(inputs) ouput = Pnet.predict(inputs) out.append(ouput) image_num = len(scales) rectangles = [] for i in range(image_num): # Probability of face cls_prob = out[i][0][0][:, :, 1] #print(cls_prob.shape) # The position of its corresponding box roi = out[i][1][0] #print(roi.shape) # Take out the length and width of each zoomed picture out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) #print(cls_prob.shape) # Decoding process rectangle = utils.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, 0.7) rectangles.extend(rectangle) # Non-maximum suppression rectangles = utils.NMS(rectangles, 0.7) if len(rectangles) == 0: return rectangles #-----------------------------# # Calculating face frame # onet part #-----------------------------# predict_batch = [] for rectangle in rectangles: crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] scale_img = cv2.resize(crop_img, (48, 48)) predict_batch.append(scale_img) predict_batch = np.array(predict_batch) #output = self.Onet.predict(predict_batch) output = Onet.predict(predict_batch) cls_prob = output[0] roi_prob = output[1] pts_prob = output[2] rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, 0.7) return rectangles
def onet_detect(self, image, rnet_boxes): img_dataset = [] rnet_boxes = utils.convertToRectangle(rnet_boxes) for rnet_box in rnet_boxes: x1 = int(rnet_box[0]) y1 = int(rnet_box[1]) x2 = int(rnet_box[2]) y2 = int(rnet_box[3]) img = image.crop((x1, y1, x2, y2)) img = img.resize((48, 48)) img_data = self.trans(img) img_dataset.append(img_data) img_dataset = torch.stack(img_dataset).to(self.device) with torch.no_grad(): confidence, offset, landmarks = self.onet(img_dataset) confidence = confidence.cpu().detach().numpy() offset = offset.cpu().detach().numpy() landmarks = landmarks.cpu().detach().numpy() indexs, _ = np.where(confidence > 0.99) if indexs.shape[0] == 0: return np.array([]) else: boxes = rnet_boxes[indexs] x1_array = boxes[:, 0] y1_array = boxes[:, 1] x2_array = boxes[:, 2] y2_array = boxes[:, 3] w_array = x2_array - x1_array h_array = y2_array - y1_array offset = offset[indexs] confidence = confidence[indexs] landmarks = landmarks[indexs] x1_real = x1_array + w_array * offset[:, 0] y1_real = y1_array + h_array * offset[:, 1] x2_real = x2_array + w_array * offset[:, 2] y2_real = y2_array + h_array * offset[:, 3] landmarks_x1, landmarks_y1 = x1_array + w_array * landmarks[:, 0], y1_array + h_array * landmarks[:, 1] landmarks_x2, landmarks_y2 = x1_array + w_array * landmarks[:, 2], y1_array + h_array * landmarks[:, 3] landmarks_x3, landmarks_y3 = x1_array + w_array * landmarks[:, 4], y1_array + h_array * landmarks[:, 5] landmarks_x4, landmarks_y4 = x1_array + w_array * landmarks[:, 6], y1_array + h_array * landmarks[:, 7] landmarks_x5, landmarks_y5 = x1_array + w_array * landmarks[:, 8], y1_array + h_array * landmarks[:, 9] boxes = np.stack([ x1_real, y1_real, x2_real, y2_real, confidence[:, 0], landmarks_x1, landmarks_y1, landmarks_x2, landmarks_y2, landmarks_x3, landmarks_y3, landmarks_x4, landmarks_y4, landmarks_x5, landmarks_y5 ], axis=1) # 判断关键点是否在真实框中 empty_box = [] for box in boxes: if (box[5] > box[0] and box[6] > box[1] and box[7] < box[2] and box[8] > box[1]) and ( box[9] > box[0] and box[10] > box[1] and box[9] < box[2] and box[10] < box[3]) and ( box[11] > box[0] and box[12] < box[3] and box[13] < box[2] and box[14] < box[3]): empty_box.append(box) boxes = np.stack(empty_box) # box = np.stack(boxes) return utils.NMS(boxes, 0.3, isMin=True)
def detectFace(self, img, threshold): #-----------------------------# # 归一化 #-----------------------------# copy_img = (img.copy() - 127.5) / 127.5 origin_h, origin_w, _ = copy_img.shape #-----------------------------# # 计算原始输入图像 # 每一次缩放的比例 #-----------------------------# scales = utils.calculateScales(img) #比例数组 out = [] #-----------------------------# # 粗略计算人脸框 # pnet部分 #-----------------------------# for scale in scales: #尺寸缩放 hs = int(origin_h * scale) #按照比例缩放 ws = int(origin_w * scale) scale_img = cv2.resize(copy_img, (ws, hs)) inputs = scale_img.reshape(1, *scale_img.shape) #各个尺寸pnet输入 ouput = self.Pnet.predict( inputs) #pnet输出#获得classifier,bbox_regress out.append(ouput) image_num = len(scales) rectangles = [] for i in range(image_num): # 有人脸的概率 cls_prob = out[i][0][0][:, :, 1] # 其对应的框的位置 roi = out[i][1][0] # 取出每个缩放后图片的长宽 out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) #print(cls_prob.shape) # 解码过程 rectangle = utils.detect_face_12net( cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) #获取人脸矩形框及其为人脸的概率[坐标,概率] rectangles.extend(rectangle) # 进行非极大抑制 rectangles = utils.NMS(rectangles, 0.7) if len(rectangles) == 0: return rectangles #-----------------------------# # 稍微精确计算人脸框 # Rnet部分 #-----------------------------# predict_24_batch = [] for rectangle in rectangles: crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int( rectangle[2])] #pnet后的候选框 scale_img = cv2.resize(crop_img, (24, 24)) predict_24_batch.append(scale_img) #PNET predict_24_batch = np.array(predict_24_batch) out = self.Rnet.predict(predict_24_batch) #Rnet的预测值 cls_prob = out[0] cls_prob = np.array(cls_prob) roi_prob = out[1] roi_prob = np.array(roi_prob) rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) if len(rectangles) == 0: return rectangles #-----------------------------# # 计算人脸框 # onet部分 #-----------------------------# predict_batch = [] for rectangle in rectangles: crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] scale_img = cv2.resize(crop_img, (48, 48)) predict_batch.append(scale_img) #rnet的候选框 predict_batch = np.array(predict_batch) output = self.Onet.predict(predict_batch) cls_prob = output[0] roi_prob = output[1] pts_prob = output[2] rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2]) return rectangles
def detectFace(self, img, threshold): #-----------------------------# # 归一化 #-----------------------------# copy_img = (img.copy() - 127.5) / 127.5 origin_h, origin_w, _ = copy_img.shape # print("orgin image's shape is: ", origin_h, origin_w) #-----------------------------# # 计算原始输入图像 # 每一次缩放的比例 #-----------------------------# scales = utils.calculateScales(img) out = [] #-----------------------------# # 粗略计算人脸框 # pnet部分 #-----------------------------# for scale in scales: pnet_inputs = [] pnet_outputs = [] hs = int(origin_h * scale) ws = int(origin_w * scale) scale_img = cv2.resize(copy_img, (ws, hs)) inputs = np.expand_dims(scale_img, 0).astype(np.float32) pnet_inputs.append( tritonclient.http.InferInput(self.Pnet_inputs[0], inputs.shape, 'FP32')) pnet_inputs[0].set_data_from_numpy(inputs, binary_data=True) pnet_outputs.append( tritonclient.http.InferRequestedOutput(self.Pnet_outputs[0], binary_data=True)) pnet_outputs.append( tritonclient.http.InferRequestedOutput(self.Pnet_outputs[1], binary_data=True)) t1 = time.time() output = self.triton_client.infer("pnet_tf", inputs=pnet_inputs, outputs=pnet_outputs) t2 = time.time() # print('pnet cost: {}ms'.format(1000*(t2 - t1))) # print(output.as_numpy(self.Pnet_outputs[0]).shape) # print(output.as_numpy(self.Pnet_outputs[1]).shape) output = [ output.as_numpy(self.Pnet_outputs[0])[0], output.as_numpy(self.Pnet_outputs[1])[0] ] out.append(output) # print(out) rectangles = [] #-------------------------------------------------# # 在这个地方我们对图像金字塔的预测结果进行循环 # 取出每张图片的种类预测和回归预测结果 #-------------------------------------------------# for i in range(len(scales)): #------------------------------------------------------------------# # 为了方便理解,这里和视频上看到的不太一样 # 因为我们在上面对图像金字塔循环的时候就把batch_size维度给去掉了 #------------------------------------------------------------------# cls_prob = out[i][0][:, :, 1] roi = out[i][1] #--------------------------------------------# # 取出每个缩放后图片的高宽 #--------------------------------------------# out_h, out_w = cls_prob.shape out_side = max(out_h, out_w) #--------------------------------------------# # 解码的过程 #--------------------------------------------# rectangle = utils.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) rectangles.extend(rectangle) #-----------------------------------------# # 进行非极大抑制 #-----------------------------------------# rectangles = np.array(utils.NMS(rectangles, 0.7)) # print(rectangles) if len(rectangles) == 0: return rectangles #-----------------------------------------# # 稍微精确计算人脸框 # Rnet部分 #-----------------------------------------# predict_24_batch = [] for rectangle in rectangles: #--------------------------------------------# # 利用获取到的粗略坐标,在原图上进行截取 #--------------------------------------------# crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] #--------------------------------------------# # 将截取到的图片进行resize,调整成24x24的大小 #--------------------------------------------# scale_img = cv2.resize(crop_img, (24, 24)) predict_24_batch.append(scale_img) # print('rnet的输入: ', np.array(predict_24_batch).shape) rnet_inputs = [] rnet_outputs = [] rnet_inputs.append( tritonclient.http.InferInput(self.Rnet_inputs[0], np.array(predict_24_batch).shape, 'FP32')) rnet_inputs[0].set_data_from_numpy(np.array(predict_24_batch).astype( np.float32), binary_data=True) rnet_outputs.append( tritonclient.http.InferRequestedOutput(self.Rnet_outputs[0], binary_data=True)) rnet_outputs.append( tritonclient.http.InferRequestedOutput(self.Rnet_outputs[1], binary_data=True)) t1 = time.time() output = self.triton_client.infer("rnet_tf", inputs=rnet_inputs, outputs=rnet_outputs) t2 = time.time() # print('rnet cost: {}ms'.format(1000*(t2-t1))) # print(output.as_numpy(self.Rnet_outputs[0]).shape) # print(output.as_numpy(self.Rnet_outputs[1]).shape) cls_prob, roi_prob = output.as_numpy( self.Rnet_outputs[0]), output.as_numpy(self.Rnet_outputs[1]) # print('cls_prob is: ') # print(cls_prob) # print('roi_prob is: ') # print(roi_prob) #-------------------------------------# # 解码的过程 #-------------------------------------# rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) if len(rectangles) == 0: return rectangles # print(rectangles) #-----------------------------# # 计算人脸框 # onet部分 #-----------------------------# predict_batch = [] for rectangle in rectangles: #------------------------------------------# # 利用获取到的粗略坐标,在原图上进行截取 #------------------------------------------# crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] #-----------------------------------------------# # 将截取到的图片进行resize,调整成48x48的大小 #-----------------------------------------------# scale_img = cv2.resize(crop_img, (48, 48)) predict_batch.append(scale_img) # print('onet的输入: ', np.array(predict_batch).shape) onet_inputs = [] onet_outputs = [] onet_inputs.append( tritonclient.http.InferInput(self.Onet_inputs[0], np.array(predict_batch).shape, 'FP32')) onet_inputs[0].set_data_from_numpy(np.array(predict_batch).astype( np.float32), binary_data=True) onet_outputs.append( tritonclient.http.InferRequestedOutput(self.Onet_outputs[0], binary_data=True)) onet_outputs.append( tritonclient.http.InferRequestedOutput(self.Onet_outputs[1], binary_data=True)) onet_outputs.append( tritonclient.http.InferRequestedOutput(self.Onet_outputs[2], binary_data=True)) t1 = time.time() output = self.triton_client.infer("onet_tf", inputs=onet_inputs, outputs=onet_outputs) t2 = time.time() # print('onet cost: {}ms'.format(1000*(t2-t1))) cls_prob, roi_prob, pts_prob = output.as_numpy( self.Onet_outputs[0]), output.as_numpy( self.Onet_outputs[1]), output.as_numpy(self.Onet_outputs[2]) #-------------------------------------# # 解码的过程 #-------------------------------------# # print('cls_prob:') # print(cls_prob) # print('roi_prob:') # print(roi_prob) # print('pts_prob:') # print(pts_prob) rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2]) return rectangles
def detect_face(img, thresholds): '''P-Net Prediction''' temp = (img.copy() - 127.5) / 127.5 orig_h, orig_w, orig_c = temp.shape # 生成用于制作图像金字塔的缩放比例列表 scales = utils.calculate_scales(temp) Pnet_outputs = [] t0 = time.time() # 生成图像金字塔列表并逐一预测结果 for scale in scales: scale_h = int(orig_h * scale) scale_w = int(orig_w * scale) scaled_img = cv2.resize(temp, (scale_w, scale_h)) # OpenCV中宽在前 input_img = scaled_img.reshape( 1, *scaled_img.shape) # reshape to (1, scale_h, scale_w, orig_c) pred = Pnet.predict( input_img ) # pred is a list of 2 arrays with the shapes (1, ?, ?, 2) & (1, ?, ?, 4) Pnet_outputs.append(pred) img_num = len(scales) rectangles_list = [] for i in range(img_num): prob = Pnet_outputs[i][0][0][:, :, 1] # 是“人脸”的置信度,对应前面(1, ?, ?, 2)中的(?, ?) roi = Pnet_outputs[i][1][0] # 人脸框的坐标偏移比例,对应前面(1, ?, ?, 4)中的(?, ?, 4) out_h, out_w = prob.shape # 每个点的值对应一个12 x 12框是否有”人“的置信度 out_side = max(out_h, out_w) # ??? prob = np.swapaxes(prob, 0, 1) roi = np.swapaxes(roi, 0, 2) # shape变为(4, ?, ?) rectangles = utils.pnet_detect_face(prob, roi, out_side, 1 / scales[i], orig_w, orig_h, thresholds[0]) rectangles_list.extend( rectangles) # 每个rectangles包含(num, x1, y1, x2, y2, score) rectangles_list = utils.NMS(rectangles_list, 0.7, 'iou') t1 = time.time() print("Time for P-Net is " + str(t1 - t0)) if len(rectangles_list) == 0: return rectangles_list '''R-Net Prediction''' cropping_count = 0 # 记录对该张图片的裁取次数 Rnet_inputs = [] for rectangle in rectangles_list: cropped_img = temp[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] scaled_img = cv2.resize(cropped_img, (24, 24)) Rnet_inputs.append(scaled_img) cropping_count += 1 Rnet_inputs = np.array(Rnet_inputs) Rnet_outputs = Rnet.predict(Rnet_inputs) prob = Rnet_outputs[0] roi = Rnet_outputs[1] prob = np.array(prob) roi = np.array(roi) rectangles_list = utils.rnet_detect_face(prob, roi, rectangles_list, orig_w, orig_h, thresholds[1]) t2 = time.time() print("Time for R-Net is " + str(t2 - t1)) if len(rectangles_list) == 0: return rectangles_list '''O-Net Prediction''' cropping_count = 0 Onet_inputs = [] for rectangle in rectangles_list: cropped_img = temp[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] scaled_img = cv2.resize(cropped_img, (48, 48)) Onet_inputs.append(scaled_img) cropping_count += 1 Onet_inputs = np.array(Onet_inputs) Onet_outputs = Onet.predict(Onet_inputs) prob = Onet_outputs[0] roi = Onet_outputs[1] pts = Onet_outputs[2] rectangles = utils.onet_detect_face(prob, roi, pts, rectangles_list, orig_w, orig_h, thresholds[2]) t3 = time.time() print("Time for O-Net is " + str(t3 - t2)) return rectangles