def detect_pnet(self, im): """Get face candidates through pnet Parameters: ----------- im: numpy array, input image array Returns: -------- bboxes_align: numpy array bboxes after calibration """ h, w, c = im.shape net_size = config.PNET_SIZE current_scale = float( net_size) / self.min_face_size # find initial scale im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape # bounding boxes for all the pyramid scales all_bboxes = list() # generating bounding boxes for each scale while min(current_height, current_width) > net_size: image_tensor = utils.convert_image_to_tensor(im_resized) feed_imgs = image_tensor.unsqueeze(0) feed_imgs = feed_imgs.to(self.device) cls_map, reg_map = self.pnet_detector(feed_imgs) cls_map_np = utils.convert_chwTensor_to_hwcNumpy(cls_map.cpu()) reg_map_np = utils.convert_chwTensor_to_hwcNumpy(reg_map.cpu()) bboxes = self.generate_bounding_box(cls_map_np, reg_map_np, current_scale, self.thresh[0]) current_scale *= self.scale_factor im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape if bboxes.size == 0: continue keep = utils.nms(bboxes[:, :5], 0.5, 'Union') bboxes = bboxes[keep] all_bboxes.append(bboxes) if len(all_bboxes) == 0: return None all_bboxes = np.vstack(all_bboxes) # apply nms to the detections from all the scales keep = utils.nms(all_bboxes[:, 0:5], 0.7, 'Union') all_bboxes = all_bboxes[keep] # 0-4: original bboxes, 5: score, 5: offsets bboxes_align = utils.calibrate_box(all_bboxes[:, 0:5], all_bboxes[:, 5:]) bboxes_align = utils.convert_to_square(bboxes_align) bboxes_align[:, 0:4] = np.round(bboxes_align[:, 0:4]) return bboxes_align
def __rnet_detect(self, image, pnet_boxes): # 创建空列表,存放抠图 _img_dataset = [] # 给p网络输出的框找出中心点,沿着最大边长的两边扩充成正方形再抠图 _pnet_boxes = utils.convert_to_square(pnet_boxes) # 遍历每个框,每个框返回框四个坐标点,抠图,缩放。数据类型转换,添加列表 for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) # 根据四个点的坐标抠图 img = image.crop((_x1, _x2, _y1, _y2)) # 缩放固定尺寸 img = img.resize((24, 24)) # 将图片数组转化为张量 img_data = self.__image_transform(img) _img_dataset.append(img_data) # stack堆叠(默认在0轴),此处相当数据类型转换 img_dataset = torch.stack(_img_dataset) # 加入cuda计算 if self.isCuda: img_dataset = img_dataset.cuda() # 将24 * 24 的图片传入网络再进行一次筛选 _cls, _offset = self.rnet(img_dataset) # 将gpu上的数据放在cpu上去,再转成数组numpy cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() # print("r_cls:", cls.shape) # (11,1):p网络生成了11个框 # print("r_offset", offset) # (11,4) boxes = [] # R网络要留下来的框,存到boxes里面 idxs, _ = np.where(cls > r_cls) # 原置信度0.6是偏低的 # 根据索引,遍历符合条件的框;1轴上的索引恰为符合条件的置信度索引 for idx in idxs: _box = _pnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) # 基准框的宽 ow = _x2 - _x1 oh = _y2 - _y1 # 实际框的坐标点 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] # 返回4个坐标点和置信度 boxes.append([x1, y1, x2, y2, cls[idx][0]]) # 原r_nms为0.5 return utils.nms(np.array(boxes), r_nms)
def __onet_detect(self, image, rnet_boxes): _img_dataset = [] if rnet_boxes is None: return _rnet_boxes = utils.convert_to_square(rnet_boxes) # tt = time.process_time() for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) if _x1 <= 0 or _y1 <= 0 or _x2 <= 0 or _y2 <= 0 or _x1 > _x2 or _y1 > _y2: continue img = image[_y1:_y2, _x1:_x2] if img.shape[0] <= 0 or img.shape[1] <= 0: continue img = cv2.resize(img, (48, 48)) img_data = self.transform(img) - 0.5 _img_dataset.append(img_data) # ee = time.process_time() # print('Of time:', ee - tt) # if _img_dataset is None: # return # try: img_dataset = torch.stack(_img_dataset, dim=0) img_dataset = img_dataset.cuda() aa = time.process_time() _cls, _offset, _iou = self.onet(img_dataset) bb = time.process_time() print('Onet time:', bb - aa) cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() idxs, _ = np.where(cls > 0.9) _box = _rnet_boxes[idxs] _x1 = _rnet_boxes[idxs][:, 0] _y1 = _rnet_boxes[idxs][:, 1] _x2 = _rnet_boxes[idxs][:, 2] _y2 = _rnet_boxes[idxs][:, 3] bw = _x2 - _x1 bh = _y2 - _y1 x1 = _x1 + bw * offset[idxs][:, 0] y1 = _y1 + bh * offset[idxs][:, 1] x2 = _x2 + bw * offset[idxs][:, 2] y2 = _y2 + bh * offset[idxs][:, 3] conf = cls[idxs, 0] boxes = np.stack([x1, y1, x2, y2, conf], axis=1) return utils.nms(np.array(boxes), 0.3, isMin=True)
def __onet_detect(self, image, rnet_boxes): # 创建空列表,存储R网络的抠图 _img_dataset = [] # 将R网络输出的框沿最大的边长扩充成正方形 _rnet_boxes = utils.convert_to_square(rnet_boxes) for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) # 根据坐标点“抠图” img = image.crop((_x1, _x2, _y1, _y2)) img = img.resize((48, 48)) # 将图片转成张量 img_data = self.__image_transform(img) _img_dataset.append(img_data) # 堆叠,相当于数据格式的转换 img_dataset = torch.stack(_img_dataset) if self.isCuda: _img_dataset = _img_dataset.cuda() _cls, _offset = self.onet(img_dataset) cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() # 存放O网络的计算结果 boxes = [] # 原o_cls为0.97,实际需要达到0.99999 idxs, _ = np.where(cls > o_cls) # 根据索引,遍历符合条件的框 for idx in idxs: # 以R网络为基准点 _box = _rnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) # 框的基准宽和高,框是方的 ow = _x2 - _x1 oh = _y2 - _y1 # O网络最终生成的框的坐标 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] boxes.append([x1, y1, x2, y2, cls[idx][0]]) # 返回四个做i标点和一个置信度 # 用最小面积的IOU(原o_nms(IOU)小于0.7的框被保留下来) return utils.nms(np.array(boxes), o_nms, isMin=True)
def detect_onet(self, im, dets): """Get face candidates using onet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of rnet Returns: ------- boxes_align: numpy array boxes after calibration landmarks_align: numpy array landmarks after calibration """ h, w, c = im.shape if dets is None: return None, None dets = utils.convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] cropped_ims_tensors = [] for i in range(num_boxes): try: if tmph[i] > 0 and tmpw[i] > 0: tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i], x[i]:ex[i], :] crop_im = cv2.resize(tmp, (48, 48)) crop_im_tensor = convert_image_to_tensor(crop_im) cropped_ims_tensors.append(crop_im_tensor) except ValueError as e: print(e) feed_imgs = torch.stack(cropped_ims_tensors) feed_imgs = feed_imgs.to(self.device) cls_map, reg, landmark = self.onet_detector(feed_imgs) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() landmark = landmark.cpu().data.numpy() keep_inds = np.where(cls_map > self.thresh[2])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] landmark = landmark[keep_inds] else: return None, None keep = utils.nms(boxes, 0.7, mode="Minimum") if len(keep) == 0: return None, None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] keep_landmark = landmark[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] bh = keep_boxes[:, 3] - keep_boxes[:, 1] align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh align_landmark_topx = keep_boxes[:, 0] align_landmark_topy = keep_boxes[:, 1] boxes_align = np.vstack([align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0] ]) boxes_align = boxes_align.T landmark = np.vstack([ align_landmark_topx + keep_landmark[:, 0] * bw, align_landmark_topy + keep_landmark[:, 1] * bh, align_landmark_topx + keep_landmark[:, 2] * bw, align_landmark_topy + keep_landmark[:, 3] * bh, align_landmark_topx + keep_landmark[:, 4] * bw, align_landmark_topy + keep_landmark[:, 5] * bh, align_landmark_topx + keep_landmark[:, 6] * bw, align_landmark_topy + keep_landmark[:, 7] * bh, align_landmark_topx + keep_landmark[:, 8] * bw, align_landmark_topy + keep_landmark[:, 9] * bh, ]) landmark_align = landmark.T return boxes_align, landmark_align
def detect_rnet(self, im, dets): """Get face candidates using rnet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of pnet Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ h, w, c = im.shape if dets is None: return None, None dets = utils.convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] cropped_ims_tensors = [] for i in range(num_boxes): try: if tmph[i] > 0 and tmpw[i] > 0: tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i], x[i]:ex[i], :] crop_im = cv2.resize(tmp, (24, 24)) crop_im_tensor = convert_image_to_tensor(crop_im) # cropped_ims_tensors[i, :, :, :] = crop_im_tensor cropped_ims_tensors.append(crop_im_tensor) except ValueError as e: print('dy: {}, edy: {}, dx: {}, edx: {}'.format(dy[i], edy[i], dx[i], edx[i])) print('y: {}, ey: {}, x: {}, ex: {}'.format(y[i], ey[i], x[i], ex[i])) print(e) feed_imgs = torch.stack(cropped_ims_tensors) feed_imgs = feed_imgs.to(self.device) cls_map, reg = self.rnet_detector(feed_imgs) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() keep_inds = np.where(cls_map > self.thresh[1])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] else: return None, None keep = utils.nms(boxes, 0.7) if len(keep) == 0: return None, None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] bh = keep_boxes[:, 3] - keep_boxes[:, 1] boxes = np.vstack([keep_boxes[:, 0], keep_boxes[:, 1], keep_boxes[:, 2], keep_boxes[:, 3], keep_cls[:, 0] ]) align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh boxes_align = np.vstack([align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0] ]) boxes = boxes.T boxes_align = boxes_align.T return boxes, boxes_align
def detect_onet(self, im, bboxes): """Get face candidates using onet Parameters: ---------- im: numpy array input image array bboxes: numpy array detection results of rnet Returns: ------- bboxes_align: numpy array bboxes after calibration """ net_size = config.ONET_SIZE h, w, c = im.shape if bboxes is None: return None [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = utils.correct_bboxes(bboxes, w, h) num_bboxes = bboxes.shape[0] # crop face using rnet proposal cropped_ims_tensors = [] for i in range(num_bboxes): try: if tmph[i] > 0 and tmpw[i] > 0: tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i], x[i]:ex[i], :] crop_im = cv2.resize(tmp, (net_size, net_size)) crop_im_tensor = utils.convert_image_to_tensor(crop_im) cropped_ims_tensors.append(crop_im_tensor) except ValueError as e: print(e) feed_imgs = torch.stack(cropped_ims_tensors) feed_imgs = feed_imgs.to(self.device) cls, reg = self.onet_detector(feed_imgs) cls = cls.cpu().data.numpy() reg = reg.cpu().data.numpy() keep_inds = np.where(cls[:, 1] > self.thresh[2])[0] if len(keep_inds) > 0: keep_bboxes = bboxes[keep_inds] keep_cls = cls[keep_inds, :] keep_reg = reg[keep_inds] keep_bboxes[:, 4] = keep_cls[:, 1].reshape((-1, )) else: return None bboxes_align = utils.calibrate_box(keep_bboxes, keep_reg) keep = utils.nms(bboxes_align, 0.7, mode='Minimum') if len(keep) == 0: return None bboxes_align = bboxes_align[keep] bboxes_align = utils.convert_to_square(bboxes_align) return bboxes_align
def detect_rnet(self, im, bboxes): """Get face candidates using rnet Parameters: ---------- im: numpy array input image array bboxes: numpy array detection results of pnet Returns: ------- bboxes_align: numpy array bboxes after calibration """ net_size = config.RNET_SIZE h, w, c = im.shape if bboxes is None: return None num_bboxes = bboxes.shape[0] [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = utils.correct_bboxes(bboxes, w, h) # crop face using pnet proposals cropped_ims_tensors = [] for i in range(num_bboxes): try: if tmph[i] > 0 and tmpw[i] > 0: tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i], x[i]:ex[i], :] crop_im = cv2.resize(tmp, (net_size, net_size)) crop_im_tensor = utils.convert_image_to_tensor(crop_im) cropped_ims_tensors.append(crop_im_tensor) except ValueError as e: print('dy: {}, edy: {}, dx: {}, edx: {}'.format( dy[i], edy[i], dx[i], edx[i])) print('y: {}, ey: {}, x: {}, ex: {}'.format( y[i], ey[i], x[i], ex[i])) print(e) # provide input tensor, if there are too many proposals in PNet # there might be OOM feed_imgs = torch.stack(cropped_ims_tensors) feed_imgs = feed_imgs.to(self.device) cls, reg = self.rnet_detector(feed_imgs) cls = cls.cpu().data.numpy() reg = reg.cpu().data.numpy() keep_inds = np.where(cls[:, 1] > self.thresh[1])[0] if len(keep_inds) > 0: keep_bboxes = bboxes[keep_inds] keep_cls = cls[keep_inds, :] keep_reg = reg[keep_inds] # using softmax 1 as cls score keep_bboxes[:, 4] = keep_cls[:, 1].reshape((-1, )) else: return None keep = utils.nms(keep_bboxes, 0.7) if len(keep) == 0: return None keep_cls = keep_cls[keep] keep_bboxes = keep_bboxes[keep] keep_reg = keep_reg[keep] bboxes_align = utils.calibrate_box(keep_bboxes, keep_reg) bboxes_align = utils.convert_to_square(bboxes_align) bboxes_align[:, 0:4] = np.round(bboxes_align[:, 0:4]) return bboxes_align
def get_rnet_sample_data(data_dir, anno_file, det_boxes_file, prefix_path): neg_save_dir = os.path.join(data_dir, "24/negative") pos_save_dir = os.path.join(data_dir, "24/positive") part_save_dir = os.path.join(data_dir, "24/part") for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]: if not os.path.exists(dir_path): os.makedirs(dir_path) # load ground truth from annotation file # format of each line: image/path [x1, y1, x2, y2] for each gt_box in this image with open(anno_file, 'r') as f: annotations = f.readlines() image_size = 24 im_idx_list = list() gt_boxes_list = list() num_of_images = len(annotations) print("processing %d images in total" % num_of_images) for annotation in annotations: # for i in range(10): annotation = annotation.strip().split(' ') # annotation = annotations[i].strip().split(' ') im_idx = os.path.join(prefix_path, annotation[0]) boxes = list(map(float, annotation[1:])) boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) im_idx_list.append(im_idx) gt_boxes_list.append(boxes) save_path = common.ANNO_STORE_DIR if not os.path.exists(save_path): os.makedirs(save_path) f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w') f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w') f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w') det_handle = open(det_boxes_file, 'rb') det_boxes = pickle.load(det_handle) print(len(det_boxes), num_of_images) assert len( det_boxes) == num_of_images, "incorrect detections or ground truths" # index of neg, pos and part face, used as their image names n_idx = 0 p_idx = 0 d_idx = 0 image_done = 0 for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list): image_done += 1 if image_done % 100 == 0: print("%d images done" % image_done) if dets.shape[0] == 0: continue img = cv2.imread(im_idx) dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) # each image have at most 50 neg_samples cur_n_idx = 0 for box in dets: x_left, y_top, x_right, y_bottom = box[0:4].astype(int) width = x_right - x_left height = y_bottom - y_top # ignore box that is too small or beyond image border if width < 20 or x_left <= 0 or y_top <= 0 or x_right >= img.shape[ 1] or y_bottom >= img.shape[0]: continue # compute intersection over union(IoU) between current box and all gt boxes Iou = IoU(box, gts) cropped_im = img[y_top:y_bottom, x_left:x_right, :] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 cur_n_idx += 1 if cur_n_idx <= 50: save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(width) offset_y1 = (y1 - y_top) / float(height) offset_x2 = (x2 - x_right) / float(width) offset_y2 = (y2 - y_bottom) / float(height) # save positive and part-face images and write labels if np.max(Iou) >= 0.65: save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 f1.close() f2.close() f3.close()
def get_onet_sample_data(data_dir, anno_file, det_boxs_file, prefix): neg_save_dir = os.path.join(data_dir, "48/negative") pos_save_dir = os.path.join(data_dir, "48/positive") part_save_dir = os.path.join(data_dir, "48/part") for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]: if not os.path.exists(dir_path): os.makedirs(dir_path) # load ground truth from annotation file # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image with open(anno_file, 'r') as f: annotations = f.readlines() image_size = 48 im_idx_list = list() gt_boxes_list = list() num_of_images = len(annotations) print("processing %d images in total" % num_of_images) for annotation in annotations: annotation = annotation.strip().split(' ') im_idx = os.path.join(prefix, annotation[0]) boxes = list(map(float, annotation[1:])) boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) im_idx_list.append(im_idx) gt_boxes_list.append(boxes) save_path = config.ANNO_STORE_DIR if not os.path.exists(save_path): os.makedirs(save_path) f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w') f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w') f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w') det_handle = open(det_boxs_file, 'rb') det_boxes = pickle.load(det_handle) print(len(det_boxes), num_of_images) assert len(det_boxes) == num_of_images, "incorrect detections or ground truths" # index of neg, pos and part face, used as their image names n_idx = 0 p_idx = 0 d_idx = 0 image_done = 0 for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list): image_done += 1 if image_done % 100 == 0: print("%d images done" % image_done) if dets.shape[0] == 0: continue img = cv2.imread(im_idx) dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) # each image have at most 50 neg_samples cur_n_idx = 0 for box in dets: x_left, y_top, x_right, y_bottom = box[0:4].astype(int) width = x_right - x_left height = y_bottom - y_top # ignore box that is too small or beyond image border if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1: continue # compute intersection over union(IoU) between current box and all gt boxes Iou = IoU(box, gts) cropped_im = img[y_top:y_bottom, x_left:x_right, :] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 cur_n_idx += 1 if cur_n_idx <= 50: save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(width) offset_y1 = (y1 - y_top) / float(height) offset_x2 = (x2 - x_right) / float(width) offset_y2 = (y2 - y_bottom) / float(height) # save positive and part-face images and write labels if np.max(Iou) >= 0.65: save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % ( offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % ( offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 f1.close() f2.close() f3.close()
if __name__ == '__main__': # 多张图片检测 image_path = r"test_image" for i in os.listdir(image_path): detector = Detector() with Image.open(os.path.join(image_path, i)) as im: # boxes = detector.detect(im) print("-" * 100) boxes = detector.detect(im) print("size:", im.size) imDraw = ImageDraw.Draw(im) # 保存网络输出的人脸(需要调整尺寸),为后面的人脸识别做准备 out_put_boxes = utils.convert_to_square(boxes) for _box in out_put_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) face_crop = im.crop((_x1, _y1, _x2, _y2)) face_crop.save(r"D:\picture\mtcnn\str{0}.jpg".format(_x1)) # 多个框,每循环一次框一个人脸 for box in boxes: x1 = int(box[0]) y1 = int(box[1]) x2 = int(box[2]) y2 = int(box[3])