def detect_onet(self, im, dets): """Get face candidates using onet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of rnet Returns: ------- boxes_align: numpy array boxes after calibration landmarks_align: numpy array landmarks after calibration """ h, w, c = im.shape if dets is None: return None, None dets = self.square_bbox(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32) cropped_ims_tensors = [] for i in range(num_boxes): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) # crop input image tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] crop_im = cv2.resize(tmp, (48, 48)) crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) # cropped_ims_tensors[i, :, :, :] = crop_im_tensor cropped_ims_tensors.append(crop_im_tensor) feed_imgs = Variable(torch.stack(cropped_ims_tensors)) if self.rnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() cls_map, reg, landmark = self.onet_detector(feed_imgs) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() landmark = landmark.cpu().data.numpy() keep_inds = np.where(cls_map > self.thresh[2])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] landmark = landmark[keep_inds] else: return None, None keep = utils.nms(boxes, 0.7, mode="Minimum") if len(keep) == 0: return None, None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] keep_landmark = landmark[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh align_landmark_topx = keep_boxes[:, 0] align_landmark_topy = keep_boxes[:, 1] boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0], # align_topx + keep_landmark[:, 0] * bw, # align_topy + keep_landmark[:, 1] * bh, # align_topx + keep_landmark[:, 2] * bw, # align_topy + keep_landmark[:, 3] * bh, # align_topx + keep_landmark[:, 4] * bw, # align_topy + keep_landmark[:, 5] * bh, # align_topx + keep_landmark[:, 6] * bw, # align_topy + keep_landmark[:, 7] * bh, # align_topx + keep_landmark[:, 8] * bw, # align_topy + keep_landmark[:, 9] * bh, ]) boxes_align = boxes_align.T landmark = np.vstack([ align_landmark_topx + keep_landmark[:, 0] * bw, align_landmark_topy + keep_landmark[:, 1] * bh, align_landmark_topx + keep_landmark[:, 2] * bw, align_landmark_topy + keep_landmark[:, 3] * bh, align_landmark_topx + keep_landmark[:, 4] * bw, align_landmark_topy + keep_landmark[:, 5] * bh, align_landmark_topx + keep_landmark[:, 6] * bw, align_landmark_topy + keep_landmark[:, 7] * bh, align_landmark_topx + keep_landmark[:, 8] * bw, align_landmark_topy + keep_landmark[:, 9] * bh, ]) landmark_align = landmark.T return boxes_align, landmark_align
def detect_pnet(self, im): """Get face candidates through pnet Parameters: ---------- im: numpy array input image array one batch Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ # im = self.unique_image_format(im) # original wider face data h, w, c = im.shape net_size = 12 current_scale = float( net_size) / self.min_face_size # find initial scale # print('imgshape:{0}, current_scale:{1}'.format(im.shape, current_scale)) im_resized = self.resize_image(im, current_scale) # scale = 1.0 current_height, current_width, _ = im_resized.shape # fcn all_boxes = list() i = 0 while min(current_height, current_width) > net_size: # print(i) feed_imgs = [] image_tensor = image_tools.convert_image_to_tensor(im_resized) feed_imgs.append(image_tensor) feed_imgs = torch.stack(feed_imgs) feed_imgs = Variable(feed_imgs) if self.pnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() # self.pnet_detector is a trained pnet torch model # receptive field is 12×12 # 12×12 --> score # 12×12 --> bounding box cls_map, reg = self.pnet_detector(feed_imgs) cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy( cls_map.cpu()) reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu()) # del cls_map # del reg # del feed_imgs # print(cls_map_np.shape, reg_np.shape) # cls_map_np = (1, n, m, 1) reg_np.shape = (1, n, m 4) # time.sleep(5) # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu()) # self.threshold[0] = 0.6 # print(cls_map_np[0,:,:].shape) # time.sleep(4) # boxes = [x1, y1, x2, y2, score, reg] boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np, current_scale, self.thresh[0]) # generate pyramid images current_scale *= self.scale_factor # self.scale_factor = 0.709 im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape if boxes.size == 0: continue # non-maximum suppresion keep = utils.nms(boxes[:, :5], 0.5, 'Union') boxes = boxes[keep] # print(boxes.shape) all_boxes.append(boxes) # i+=1 if len(all_boxes) == 0: return None, None all_boxes = np.vstack(all_boxes) # print("shape of all boxes {0}".format(all_boxes.shape)) # time.sleep(5) # merge the detection from first stage keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union') all_boxes = all_boxes[keep] # boxes = all_boxes[:, :5] # x2 - x1 # y2 - y1 bw = all_boxes[:, 2] - all_boxes[:, 0] + 1 bh = all_boxes[:, 3] - all_boxes[:, 1] + 1 # landmark_keep = all_boxes[:, 9:].reshape((5,2)) boxes = np.vstack([ all_boxes[:, 0], all_boxes[:, 1], all_boxes[:, 2], all_boxes[:, 3], all_boxes[:, 4], # all_boxes[:, 0] + all_boxes[:, 9] * bw, # all_boxes[:, 1] + all_boxes[:,10] * bh, # all_boxes[:, 0] + all_boxes[:, 11] * bw, # all_boxes[:, 1] + all_boxes[:, 12] * bh, # all_boxes[:, 0] + all_boxes[:, 13] * bw, # all_boxes[:, 1] + all_boxes[:, 14] * bh, # all_boxes[:, 0] + all_boxes[:, 15] * bw, # all_boxes[:, 1] + all_boxes[:, 16] * bh, # all_boxes[:, 0] + all_boxes[:, 17] * bw, # all_boxes[:, 1] + all_boxes[:, 18] * bh ]) boxes = boxes.T # boxes = boxes = [x1, y1, x2, y2, score, reg] reg= [px1, py1, px2, py2] (in prediction) align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh # refine the boxes boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, all_boxes[:, 4], # align_topx + all_boxes[:,9] * bw, # align_topy + all_boxes[:,10] * bh, # align_topx + all_boxes[:,11] * bw, # align_topy + all_boxes[:,12] * bh, # align_topx + all_boxes[:,13] * bw, # align_topy + all_boxes[:,14] * bh, # align_topx + all_boxes[:,15] * bw, # align_topy + all_boxes[:,16] * bh, # align_topx + all_boxes[:,17] * bw, # align_topy + all_boxes[:,18] * bh, ]) boxes_align = boxes_align.T return boxes, boxes_align
def detect_rnet(self, im, dets): """Get face candidates using rnet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of pnet Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ # im: an input image h, w, c = im.shape if dets is None: return None, None # (705, 5) = [x1, y1, x2, y2, score, reg] # print("pnet detection {0}".format(dets.shape)) # time.sleep(5) # return square boxes dets = self.square_bbox(dets) # rounds dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] ''' # helper for setting RNet batch size batch_size = self.rnet_detector.batch_size ratio = float(num_boxes) / batch_size if ratio > 3 or ratio < 0.3: print "You may need to reset RNet batch size if this info appears frequently, \ face candidates:%d, current batch_size:%d"%(num_boxes, batch_size) ''' # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32) cropped_ims_tensors = [] for i in range(num_boxes): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] crop_im = cv2.resize(tmp, (24, 24)) crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) # cropped_ims_tensors[i, :, :, :] = crop_im_tensor cropped_ims_tensors.append(crop_im_tensor) feed_imgs = Variable(torch.stack(cropped_ims_tensors)) if self.rnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() cls_map, reg = self.rnet_detector(feed_imgs) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() # landmark = landmark.cpu().data.numpy() keep_inds = np.where(cls_map > self.thresh[1])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] # landmark = landmark[keep_inds] else: return None, None keep = utils.nms(boxes, 0.7) if len(keep) == 0: return None, None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] # keep_landmark = landmark[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 boxes = np.vstack([ keep_boxes[:, 0], keep_boxes[:, 1], keep_boxes[:, 2], keep_boxes[:, 3], keep_cls[:, 0], # keep_boxes[:,0] + keep_landmark[:, 0] * bw, # keep_boxes[:,1] + keep_landmark[:, 1] * bh, # keep_boxes[:,0] + keep_landmark[:, 2] * bw, # keep_boxes[:,1] + keep_landmark[:, 3] * bh, # keep_boxes[:,0] + keep_landmark[:, 4] * bw, # keep_boxes[:,1] + keep_landmark[:, 5] * bh, # keep_boxes[:,0] + keep_landmark[:, 6] * bw, # keep_boxes[:,1] + keep_landmark[:, 7] * bh, # keep_boxes[:,0] + keep_landmark[:, 8] * bw, # keep_boxes[:,1] + keep_landmark[:, 9] * bh, ]) align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0], # align_topx + keep_landmark[:, 0] * bw, # align_topy + keep_landmark[:, 1] * bh, # align_topx + keep_landmark[:, 2] * bw, # align_topy + keep_landmark[:, 3] * bh, # align_topx + keep_landmark[:, 4] * bw, # align_topy + keep_landmark[:, 5] * bh, # align_topx + keep_landmark[:, 6] * bw, # align_topy + keep_landmark[:, 7] * bh, # align_topx + keep_landmark[:, 8] * bw, # align_topy + keep_landmark[:, 9] * bh, ]) boxes = boxes.T boxes_align = boxes_align.T return boxes, boxes_align
def detect_rnet(self, im, dets): """Get face candidates using rnet Parameters: ---------- im: numpy array, input image array dets: numpy array, detection results of pnet Returns: ------- boxes: numpy array, detected boxes before calibration boxes_align: numpy array, boxes after calibration """ # im: an input image h, w, c = im.shape if dets is None: return None dets = self.square_bbox(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [y1, y2, x1, x2, anchor_y1, anchor_y2, \ anchor_x1, anchor_x2, box_w, box_h] = self.boundary_check(dets, w, h) num_boxes = dets.shape[0] cropped_ims_tensors = [] for i in range(num_boxes): tmp_img = np.zeros((box_h[i], box_w[i], 3), dtype=np.uint8) tmp_img[y1[i]:y2[i] + 1, x1[i]:x2[i] + 1, :] = im[anchor_y1[i]:anchor_y2[i] + 1, anchor_x1[i]:anchor_x2[i] + 1, :] crop_im = cv2.resize(tmp_img, (24, 24)) crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) cropped_ims_tensors.append(crop_im_tensor) feed_imgs = Variable(torch.stack(cropped_ims_tensors)) try: if self.rnet_detector.module.use_cuda: # Multi-GPUs feed_imgs = feed_imgs.cuda() except: if self.rnet_detector.use_cuda: # Single-GPU or CPU feed_imgs = feed_imgs.cuda() cls_map, reg, _ = self.rnet_detector(feed_imgs) # CORE cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() keep_inds = np.where(cls_map > self.args.prob_thres[1])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] # NOTE :: det_box from Pnet cls = cls_map[keep_inds] reg = reg[keep_inds] else: return None keep = utils.nms(boxes, 0.7) if len(keep) == 0: return None keep_cls = cls[keep] keep_boxes = boxes[keep] keep_reg = reg[keep] bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 align_x1 = keep_boxes[:, 0] + keep_reg[:, 0] * bw align_y1 = keep_boxes[:, 1] + keep_reg[:, 1] * bh align_x2 = keep_boxes[:, 2] + keep_reg[:, 2] * bw align_y2 = keep_boxes[:, 3] + keep_reg[:, 3] * bh boxes_align = np.vstack( [align_x1, align_y1, align_x2, align_y2, keep_cls[:, 0]]).T return boxes_align
def detect_onet(self, im, dets): """Get face candidates using onet Parameters: ---------- im: numpy array, input image array dets: numpy array, detection results of rnet Returns: ------- boxes_align: numpy array, boxes after calibration landmarks_align: numpy array, landmarks after calibration """ h, w, c = im.shape if dets is None: return None, None dets = self.square_bbox(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [y1, y2, x1, x2, anchor_y1, anchor_y2, \ anchor_x1, anchor_x2, box_w, box_h] = self.boundary_check(dets, w, h) cropped_ims_tensors = [] for i in range(dets.shape[0]): tmp_img = np.zeros((box_h[i], box_w[i], 3), dtype=np.uint8) tmp_img[y1[i]:y2[i] + 1, x1[i]:x2[i] + 1, :] = im[anchor_y1[i]:anchor_y2[i] + 1, anchor_x1[i]:anchor_x2[i] + 1, :] crop_im = cv2.resize(tmp_img, (48, 48)) crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) cropped_ims_tensors.append(crop_im_tensor) feed_imgs = Variable(torch.stack(cropped_ims_tensors)) try: if self.rnet_detector.module.use_cuda: # Multi-GPUs feed_imgs = feed_imgs.cuda() except: if self.rnet_detector.use_cuda: # Single-GPU or CPU feed_imgs = feed_imgs.cuda() cls_map, reg, landmark = self.onet_detector(feed_imgs) # look all cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() landmark = landmark.cpu().data.numpy() keep_inds = np.where(cls_map > self.args.prob_thres[2])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] landmark = landmark[keep_inds] else: return None, None bw = boxes[:, 2] - boxes[:, 0] + 1 bh = boxes[:, 3] - boxes[:, 1] + 1 align_x1 = boxes[:, 0] + reg[:, 0] * bw align_y1 = boxes[:, 1] + reg[:, 1] * bh align_x2 = boxes[:, 2] + reg[:, 2] * bw align_y2 = boxes[:, 3] + reg[:, 3] * bh boxes_align = np.vstack( [align_x1, align_y1, align_x2, align_y2, cls[:, 0]]).T # TODO :: 68 <--> 5 lmk_align = landmark.copy() x_idx = [2 * s for s in range(68)] y_idx = [2 * s + 1 for s in range(68)] for idx in range(lmk_align.shape[0]): lmk_align[idx, x_idx] = boxes[idx, 0] + lmk_align[idx, x_idx] * bw[idx] lmk_align[idx, y_idx] = boxes[idx, 1] + lmk_align[idx, y_idx] * bh[idx] keep = utils.nms(boxes_align, 0.7, mode='Minimum') if len(keep) == 0: return None, None boxes_align = boxes_align[keep] lmk_align = lmk_align[keep] return boxes_align, lmk_align
def detect_rnet(self, im, dets): """Get face candidates using rnet Parameters: ---------- im: numpy array input image array dets: numpy array detection results of pnet Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ h, w, c = im.shape if dets is None: # return np.array([]),np.array([]) return None, None dets = self.square_bbox(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) num_boxes = dets.shape[0] if num_boxes == 0: return None, None ''' # helper for setting RNet batch size batch_size = self.rnet_detector.batch_size ratio = float(num_boxes) / batch_size if ratio > 3 or ratio < 0.3: print "You may need to reset RNet batch size if this info appears frequently, \ face candidates:%d, current batch_size:%d"%(num_boxes, batch_size) ''' # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32) cropped_ims_tensors = [] for i in range(num_boxes): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] crop_im = cv2.resize(tmp, (24, 24)) crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) # cropped_ims_tensors[i, :, :, :] = crop_im_tensor cropped_ims_tensors.append(crop_im_tensor) feed_imgs = Variable(torch.stack(cropped_ims_tensors).float()) if self.rnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() cls_map, reg = self.rnet_detector(feed_imgs) cls_map = cls_map.cpu().data.numpy() reg = reg.cpu().data.numpy() # landmark = landmark.cpu().data.numpy() keep_inds = np.where(cls_map > self.thresh[1])[0] if len(keep_inds) > 0: boxes = dets[keep_inds] cls = cls_map[keep_inds] reg = reg[keep_inds] # landmark = landmark[keep_inds] else: # return np.array([]), np.array([]) return None, None # keep = utils.nms(boxes, 0.7) # # # return None, None # # keep_cls = cls # keep_boxes = boxes # keep_reg = reg # keep_landmark = landmark[keep] bw = boxes[:, 2] - boxes[:, 0] + 1 bh = boxes[:, 3] - boxes[:, 1] + 1 align_topx = np.maximum(boxes[:, 0] + reg[:, 0] * bw, 0) align_topy = np.maximum(boxes[:, 1] + reg[:, 1] * bh, 0) align_bottomx = np.minimum(boxes[:, 2] + reg[:, 2] * bw, w) align_bottomy = np.minimum(boxes[:, 3] + reg[:, 3] * bh, h) boxes = np.vstack([ boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3], cls[:, 0], ]) boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, cls[:, 0], ]) boxes = boxes.T boxes_align = boxes_align.T keep = utils.nms(boxes_align, 0.7) if len(keep) == 0: # return np.array([]), np.array([]) return None, None boxes_align = boxes_align[keep] return boxes, boxes_align
def detect_pnet(self, im): """Get face candidates through pnet Parameters: ---------- im: numpy array, input image array, one batch Returns: ------- boxes: numpy array, detected boxes before calibration boxes_align: numpy array, boxes after calibration """ # im = self.unique_image_format(im) h, w, c = im.shape net_size = 12 current_scale = float(net_size) / self.min_face_size # scale = 1.0 im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape all_boxes = list() while min(current_height, current_width) > net_size: feed_imgs = [] image_tensor = image_tools.convert_image_to_tensor(im_resized) feed_imgs.append(image_tensor) feed_imgs = Variable(torch.stack(feed_imgs)) try: if self.pnet_detector.module.use_cuda: # Multi-GPUs feed_imgs = feed_imgs.cuda() except: if self.pnet_detector.use_cuda: # Single-GPU or CPU feed_imgs = feed_imgs.cuda() cls_map, reg, _ = self.pnet_detector( feed_imgs) # CORE, Don't look landmark cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy( cls_map.cpu()) reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu()) # boxes = [x1, y1, x2, y2, score, reg] boxes = self.generate_bbox(cls_map_np[0, :, :], reg_np, current_scale, self.args.prob_thres[0]) # generate pyramid images current_scale *= self.scale_factor # self.scale_factor = 0.709 im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape if boxes.size == 0: continue # non-maximum suppresion keep = utils.nms(boxes[:, :5], 0.5, 'Union') boxes = boxes[keep] all_boxes.append(boxes) if len(all_boxes) == 0: return None all_boxes = np.vstack(all_boxes) keep = utils.nms(all_boxes[:, :5], 0.7, 'Union') all_boxes = all_boxes[keep] bw = all_boxes[:, 2] - all_boxes[:, 0] + 1 bh = all_boxes[:, 3] - all_boxes[:, 1] + 1 # all_boxes = [x1, y1, x2, y2, score, reg] align_x1 = all_boxes[:, 0] + all_boxes[:, 5] * bw align_y1 = all_boxes[:, 1] + all_boxes[:, 6] * bh align_x2 = all_boxes[:, 2] + all_boxes[:, 7] * bw align_y2 = all_boxes[:, 3] + all_boxes[:, 8] * bh boxes_align = np.vstack([ align_x1, align_y1, align_x2, align_y2, all_boxes[:, 4], ]).T return boxes_align
def detect_pnet(self, im): """Get face candidates through pnet Parameters: ---------- im: numpy array input image array Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ # im = self.unique_image_format(im) h, w, c = im.shape net_size = 12 current_scale = float( net_size) / self.min_face_size # find initial scale im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape # fcn all_boxes = list() while min(current_height, current_width) > net_size: feed_imgs = [] image_tensor = image_tools.convert_image_to_tensor(im_resized) feed_imgs.append(image_tensor) feed_imgs = torch.stack(feed_imgs).float() feed_imgs = Variable(feed_imgs) if self.pnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() cls_map, reg = self.pnet_detector(feed_imgs) cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy( cls_map.cpu()) reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu()) # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu()) boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np, current_scale, self.thresh[0]) current_scale *= self.scale_factor im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape if boxes.size == 0: continue keep = utils.nms(boxes[:, :5], 0.5, 'Union') boxes = boxes[keep] all_boxes.append(boxes) if len(all_boxes) == 0: return None, None all_boxes = np.vstack(all_boxes) # merge the detection from first stage # keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union') # all_boxes = all_boxes[keep] # boxes = all_boxes[:, :5] bw = all_boxes[:, 2] - all_boxes[:, 0] + 1 bh = all_boxes[:, 3] - all_boxes[:, 1] + 1 # landmark_keep = all_boxes[:, 9:].reshape((5,2)) boxes = np.vstack([ all_boxes[:, 0], all_boxes[:, 1], all_boxes[:, 2], all_boxes[:, 3], all_boxes[:, 4], # all_boxes[:, 0] + all_boxes[:, 9] * bw, # all_boxes[:, 1] + all_boxes[:,10] * bh, # all_boxes[:, 0] + all_boxes[:, 11] * bw, # all_boxes[:, 1] + all_boxes[:, 12] * bh, # all_boxes[:, 0] + all_boxes[:, 13] * bw, # all_boxes[:, 1] + all_boxes[:, 14] * bh, # all_boxes[:, 0] + all_boxes[:, 15] * bw, # all_boxes[:, 1] + all_boxes[:, 16] * bh, # all_boxes[:, 0] + all_boxes[:, 17] * bw, # all_boxes[:, 1] + all_boxes[:, 18] * bh ]) boxes = boxes.T align_topx = np.maximum(all_boxes[:, 0] + all_boxes[:, 5] * bw, 0) align_topy = np.maximum(all_boxes[:, 1] + all_boxes[:, 6] * bh, 0) align_bottomx = np.minimum(all_boxes[:, 2] + all_boxes[:, 7] * bw, w) align_bottomy = np.minimum(all_boxes[:, 3] + all_boxes[:, 8] * bh, h) # refine the boxes boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, all_boxes[:, 4], # align_topx + all_boxes[:,9] * bw, # align_topy + all_boxes[:,10] * bh, # align_topx + all_boxes[:,11] * bw, # align_topy + all_boxes[:,12] * bh, # align_topx + all_boxes[:,13] * bw, # align_topy + all_boxes[:,14] * bh, # align_topx + all_boxes[:,15] * bw, # align_topy + all_boxes[:,16] * bh, # align_topx + all_boxes[:,17] * bw, # align_topy + all_boxes[:,18] * bh, ]) boxes_align = boxes_align.T keep = utils.nms(boxes_align, 0.7, 'Union') boxes_align = boxes_align[keep] return boxes, boxes_align
def train_rnet(model_store_path, end_epoch,imdb, batch_size,frequent=50,base_lr=0.01,use_cuda=True): if not os.path.exists(model_store_path): os.makedirs(model_store_path) lossfn = LossFn() net = RNet(is_train=True, use_cuda=use_cuda) net.train() if use_cuda: net.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=base_lr) train_data=TrainImageReader(imdb,24,batch_size,shuffle=True) for cur_epoch in range(1,end_epoch+1): train_data.reset() for batch_idx,(image,(gt_label,gt_bbox,gt_landmark))in enumerate(train_data): im_tensor = [ image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0]) ] im_tensor = torch.stack(im_tensor) im_tensor = Variable(im_tensor) gt_label = Variable(torch.from_numpy(gt_label).float()) gt_bbox = Variable(torch.from_numpy(gt_bbox).float()) gt_landmark = Variable(torch.from_numpy(gt_landmark).float()) if use_cuda: im_tensor = im_tensor.cuda() gt_label = gt_label.cuda() gt_bbox = gt_bbox.cuda() gt_landmark = gt_landmark.cuda() cls_pred, box_offset_pred = net(im_tensor) # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred) cls_loss = lossfn.cls_loss(gt_label,cls_pred) box_offset_loss = lossfn.box_loss(gt_label,gt_bbox,box_offset_pred) # landmark_loss = lossfn.landmark_loss(gt_label,gt_landmark,landmark_offset_pred) all_loss = cls_loss*1.0+box_offset_loss*0.5 if batch_idx%frequent==0: accuracy=compute_accuracy(cls_pred,gt_label) show1 = accuracy.data.cpu().numpy() show2 = cls_loss.data.cpu().numpy() show3 = box_offset_loss.data.cpu().numpy() # show4 = landmark_loss.data.cpu().numpy() show5 = all_loss.data.cpu().numpy() print("%s : Epoch: %d, Step: %d, accuracy: %s, det loss: %s, bbox loss: %s, all_loss: %s, lr:%s "%(datetime.datetime.now(), cur_epoch, batch_idx, show1, show2, show3, show5, base_lr)) optimizer.zero_grad() all_loss.backward() optimizer.step() torch.save(net.state_dict(), os.path.join(model_store_path,"rnet_epoch_%d.pt" % cur_epoch)) torch.save(net, os.path.join(model_store_path,"rnet_epoch_model_%d.pkl" % cur_epoch))
def detect_pnet(self, im): """Get face candidates through pnet Parameters: ---------- im: numpy array input image array one batch Returns: ------- boxes: numpy array detected boxes before calibration boxes_align: numpy array boxes after calibration """ # im = self.unique_image_format(im) # original wider face data h, w, c = im.shape net_size = 12 current_scale = float( net_size) / self.min_face_size # find initial scale #print('imgshape:{0}, current_scale:{1}'.format(im.shape, current_scale)) im_resized = self.resize_image(im, current_scale) # scale = 1.0 current_height, current_width, _ = im_resized.shape # fcn all_boxes = list() while min(current_height, current_width) > net_size: #print('current:',current_height, current_width) feed_imgs = [] image_tensor = image_tools.convert_image_to_tensor(im_resized) feed_imgs.append(image_tensor) feed_imgs = torch.stack(feed_imgs) feed_imgs = Variable(feed_imgs) if self.pnet_detector.use_cuda: feed_imgs = feed_imgs.cuda() # self.pnet_detector is a trained pnet torch model # receptive field is 12×12 # 12×12 --> score # 12×12 --> bounding box cls_map, reg = self.pnet_detector(feed_imgs) cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy( cls_map.cpu()) reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu()) # print(cls_map_np.shape, reg_np.shape) # cls_map_np = (1, n, m, 1) reg_np.shape = (1, n, m 4) # time.sleep(5) # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu()) # self.threshold[0] = 0.6 # print(cls_map_np[0,:,:].shape) # time.sleep(4) # boxes = [x1, y1, x2, y2, score, reg] boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np, current_scale, self.thresh[0]) #cv2.rectangle(im,(300,100),(400,200),color=(0,0,0)) #cv2.rectangle(im,(400,200),(500,300),color=(0,0,0)) # generate pyramid images current_scale *= self.scale_factor # self.scale_factor = 0.709 im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape if boxes.size == 0: continue # non-maximum suppresion keep = utils.nms(boxes[:, :5], 0.5, 'Union') boxes = boxes[keep] all_boxes.append(boxes) """ img = im.copy() bw = boxes[:,2]-boxes[:,0] bh = boxes[:,3]-boxes[:,1] for i in range(boxes.shape[0]): p1=(int(boxes[i][0]+boxes[i][5]*bw[i]),int(boxes[i][1]+boxes[i][6]*bh[i])) p2=(int(boxes[i][2]+boxes[i][7]*bw[i]),int(boxes[i][3]+boxes[i][8]*bh[i])) cv2.rectangle(img,p1,p2,color=(0,0,0)) cv2.imshow('ss',img) cv2.waitKey(0) #ii+=1 exit() """ if len(all_boxes) == 0: return None, None all_boxes = np.vstack(all_boxes) # print("shape of all boxes {0}".format(all_boxes.shape)) # time.sleep(5) # merge the detection from first stage keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union') all_boxes = all_boxes[keep] # boxes = all_boxes[:, :5] # x2 - x1 # y2 - y1 bw = all_boxes[:, 2] - all_boxes[:, 0] + 1 bh = all_boxes[:, 3] - all_boxes[:, 1] + 1 # landmark_keep = all_boxes[:, 9:].reshape((5,2)) boxes = np.vstack([ all_boxes[:, 0], all_boxes[:, 1], all_boxes[:, 2], all_boxes[:, 3], all_boxes[:, 4], # all_boxes[:, 0] + all_boxes[:, 9] * bw, # all_boxes[:, 1] + all_boxes[:,10] * bh, # all_boxes[:, 0] + all_boxes[:, 11] * bw, # all_boxes[:, 1] + all_boxes[:, 12] * bh, # all_boxes[:, 0] + all_boxes[:, 13] * bw, # all_boxes[:, 1] + all_boxes[:, 14] * bh, # all_boxes[:, 0] + all_boxes[:, 15] * bw, # all_boxes[:, 1] + all_boxes[:, 16] * bh, # all_boxes[:, 0] + all_boxes[:, 17] * bw, # all_boxes[:, 1] + all_boxes[:, 18] * bh ]) boxes = boxes.T # boxes = boxes = [x1, y1, x2, y2, score, reg] reg= [px1, py1, px2, py2] (in prediction) align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh # refine the boxes boxes_align = np.vstack([ align_topx, align_topy, align_bottomx, align_bottomy, all_boxes[:, 4], # align_topx + all_boxes[:,9] * bw, # align_topy + all_boxes[:,10] * bh, # align_topx + all_boxes[:,11] * bw, # align_topy + all_boxes[:,12] * bh, # align_topx + all_boxes[:,13] * bw, # align_topy + all_boxes[:,14] * bh, # align_topx + all_boxes[:,15] * bw, # align_topy + all_boxes[:,16] * bh, # align_topx + all_boxes[:,17] * bw, # align_topy + all_boxes[:,18] * bh, ]) boxes_align = boxes_align.T #remove invalid box valindex = [True for _ in range(boxes_align.shape[0])] for i in range(boxes_align.shape[0]): if boxes_align[i][2] - boxes_align[i][0] <= 3 or boxes_align[i][ 3] - boxes_align[i][1] <= 3: valindex[i] = False print('pnet has one smaller than 3') else: if boxes_align[i][2] < 1 or boxes_align[i][ 0] > w - 2 or boxes_align[i][3] < 1 or boxes_align[i][ 1] > h - 2: valindex[i] = False print('pnet has one out') boxes_align = boxes_align[valindex, :] boxes = boxes[valindex, :] return boxes, boxes_align
def train_pnet(model_store_path, end_epoch, imdb, batch_size, frequent=10, base_lr=0.01, lr_epoch_decay=[9], use_cuda=True, load=''): #create lr_list lr_epoch_decay.append(end_epoch + 1) lr_list = np.zeros(end_epoch) lr_t = base_lr for i in range(len(lr_epoch_decay)): if i == 0: lr_list[0:lr_epoch_decay[i] - 1] = lr_t else: lr_list[lr_epoch_decay[i - 1] - 1:lr_epoch_decay[i] - 1] = lr_t lr_t *= 0.1 if not os.path.exists(model_store_path): os.makedirs(model_store_path) lossfn = LossFn() net = PNet(is_train=True, use_cuda=use_cuda) if load != '': net.load_state_dict(torch.load(load)) print('model loaded', load) net.train() if use_cuda: net.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=lr_list[0]) #optimizer = torch.optim.SGD(net.parameters(), lr=lr_list[0]) train_data = TrainImageReader(imdb, 12, batch_size, shuffle=True) #frequent = 10 for cur_epoch in range(1, end_epoch + 1): train_data.reset() # shuffle for param in optimizer.param_groups: param['lr'] = lr_list[cur_epoch - 1] for batch_idx, (image, (gt_label, gt_bbox, gt_landmark)) in enumerate(train_data): im_tensor = [ image_tools.convert_image_to_tensor(image[i, :, :, :]) for i in range(image.shape[0]) ] im_tensor = torch.stack(im_tensor) im_tensor = Variable(im_tensor) gt_label = Variable(torch.from_numpy(gt_label).float()) gt_bbox = Variable(torch.from_numpy(gt_bbox).float()) # gt_landmark = Variable(torch.from_numpy(gt_landmark).float()) if use_cuda: im_tensor = im_tensor.cuda() gt_label = gt_label.cuda() gt_bbox = gt_bbox.cuda() # gt_landmark = gt_landmark.cuda() cls_pred, box_offset_pred = net(im_tensor) # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred) cls_loss = lossfn.cls_loss(gt_label, cls_pred) box_offset_loss = lossfn.box_loss(gt_label, gt_bbox, box_offset_pred) # landmark_loss = lossfn.landmark_loss(gt_label,gt_landmark,landmark_offset_pred) all_loss = cls_loss * 1.0 + box_offset_loss * 0.5 if batch_idx % frequent == 0: accuracy = compute_accuracy(cls_pred, gt_label) show1 = accuracy.data.cpu().numpy() show2 = cls_loss.data.cpu().numpy() show3 = box_offset_loss.data.cpu().numpy() # show4 = landmark_loss.data.cpu().numpy() show5 = all_loss.data.cpu().numpy() print( "%s : Epoch: %d, Step: %d, accuracy: %s, det loss: %s, bbox loss: %s, all_loss: %s, lr:%s " % (datetime.datetime.now(), cur_epoch, batch_idx, show1, show2, show3, show5, lr_list[cur_epoch - 1])) optimizer.zero_grad() all_loss.backward() optimizer.step() torch.save( net.state_dict(), os.path.join(model_store_path, "pnet_epoch_%d.pt" % cur_epoch)) torch.save( net, os.path.join(model_store_path, "pnet_epoch_model_%d.pkl" % cur_epoch))
def train_pnet(model_store_path, end_epoch, imdb, batch_size, frequent=50, base_lr=0.01, use_cuda=True): if not os.path.exists(model_store_path): os.makedirs(model_store_path) lossfn = LossFn() net = PNet(is_train=True, use_cuda=use_cuda) checkpoint = torch.load('model_store/pnet_epoch_4.pt') net.load_state_dict(checkpoint) net.train() if use_cuda: net.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=base_lr) train_data = TrainImageReader(imdb, 12, batch_size, shuffle=True) for cur_epoch in range(1, end_epoch + 1): train_data.reset() accuracy_list = [] cls_loss_list = [] bbox_loss_list = [] # landmark_loss_list=[] for batch_idx, (image, (gt_label, gt_bbox, gt_landmark)) in enumerate(train_data): im_tensor = [ image_tools.convert_image_to_tensor(image[i, :, :, :]) for i in range(image.shape[0]) ] im_tensor = torch.stack(im_tensor).float() im_tensor = Variable(im_tensor) gt_label = Variable(torch.from_numpy(gt_label).float()) gt_bbox = Variable(torch.from_numpy(gt_bbox).float()) # gt_landmark = Variable(torch.from_numpy(gt_landmark).float()) if use_cuda: im_tensor = im_tensor.cuda() gt_label = gt_label.cuda() gt_bbox = gt_bbox.cuda() # gt_landmark = gt_landmark.cuda() cls_pred, box_offset_pred = net(im_tensor) # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred) cls_loss = lossfn.cls_loss(gt_label, cls_pred) box_offset_loss = lossfn.box_loss(gt_label, gt_bbox, box_offset_pred) # landmark_loss = lossfn.landmark_loss(gt_label,gt_landmark,landmark_offset_pred) all_loss = cls_loss * 1.0 + box_offset_loss * 0.5 if batch_idx % frequent == 0: accuracy = compute_accuracy(cls_pred, gt_label) # show1 = accuracy.data.tolist()[0] # show2 = cls_loss.data.tolist()[0] # show3 = box_offset_loss.data.tolist()[0] # show5 = all_loss.data.tolist()[0] show1 = accuracy.item() show2 = cls_loss.item() show3 = box_offset_loss.item() show5 = all_loss.item() print( "%s : Epoch: %d, Step: %d, accuracy: %.4f, det loss: %.4f, bbox loss: %.4f, all_loss: %.4f, lr:%s " % (datetime.datetime.now(), cur_epoch, batch_idx, show1, show2, show3, show5, base_lr)) accuracy_list.append(accuracy) cls_loss_list.append(cls_loss) bbox_loss_list.append(box_offset_loss) optimizer.zero_grad() all_loss.backward() optimizer.step() accuracy_avg = torch.mean(torch.tensor(accuracy_list)) cls_loss_avg = torch.mean(torch.tensor(cls_loss_list)) bbox_loss_avg = torch.mean(torch.tensor(bbox_loss_list)) # landmark_loss_avg = torch.mean(torch.cat(landmark_loss_list)) # show6 = accuracy_avg.data.tolist()[0] # show7 = cls_loss_avg.data.tolist()[0] # show8 = bbox_loss_avg.data.tolist()[0] show6 = accuracy_avg.item() show7 = cls_loss_avg.item() show8 = bbox_loss_avg.item() print("Epoch: %d, accuracy: %s, cls loss: %s, bbox loss: %s" % (cur_epoch, show6, show7, show8)) # state = {'net': net.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': cur_epoch} torch.save( net.state_dict(), os.path.join(model_store_path, "pnet_epoch_%d.pt" % cur_epoch)) torch.save( net, os.path.join(model_store_path, "pnet_epoch_model_%d.pkl" % cur_epoch))