def save_hard_example(net, data, save_path): # load ground truth from annotation file # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image im_idx_list = data['images'] # print(images[0]) gt_boxes_list = data['bboxes'] num_of_images = len(im_idx_list) print("processing %d images in total" % num_of_images) # save files neg_label_file = "../../DATA/no_LM%d/neg_%d.txt" % (net, image_size) neg_file = open(neg_label_file, 'w') pos_label_file = "../../DATA/no_LM%d/pos_%d.txt" % (net, image_size) pos_file = open(pos_label_file, 'w') part_label_file = "../../DATA/no_LM%d/part_%d.txt" % (net, image_size) part_file = open(part_label_file, 'w') # read detect result det_boxes = pickle.load( open(os.path.join(save_path, 'detections.pkl'), 'rb')) # print(len(det_boxes), num_of_images) print(len(det_boxes)) print(num_of_images) assert len( det_boxes) == num_of_images, "incorrect detections or ground truths" # index of neg, pos and part face, used as their image names n_idx = 0 p_idx = 0 d_idx = 0 image_done = 0 # im_idx_list image index(list) # det_boxes detect result(list) # gt_boxes_list gt(list) for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list): gts = np.array(gts, dtype=np.float32).reshape(-1, 4) if image_done % 100 == 0: print("%d images done" % image_done) image_done += 1 if dets.shape[0] == 0: continue img = cv2.imread(im_idx) # change to square dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) neg_num = 0 for box in dets: x_left, y_top, x_right, y_bottom, _ = box.astype(int) width = x_right - x_left + 1 height = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[ 1] - 1 or y_bottom > img.shape[0] - 1: continue # compute intersection over union(IoU) between current box and all gt boxes Iou = IoU(box, gts) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label # Iou with all gts must below 0.3 if np.max(Iou) < 0.3 and neg_num < 60: # save the examples save_file = get_path(neg_dir, "%s.jpg" % n_idx) # print(save_file) neg_file.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 neg_num += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(width) offset_y1 = (y1 - y_top) / float(height) offset_x2 = (x2 - x_right) / float(width) offset_y2 = (y2 - y_bottom) / float(height) # save positive and part-face images and write labels if np.max(Iou) >= 0.65: save_file = get_path(pos_dir, "%s.jpg" % p_idx) pos_file.write( save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join(part_dir, "%s.jpg" % d_idx) part_file.write( save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 neg_file.close() part_file.close() pos_file.close()
def save_hard_example(net): image_dir = "/Users/qiuxiaocong/Downloads/WIDER_train/images" # 用于存放24×24的R_NET 训练数据 images! neg_save_dir = "/Users/qiuxiaocong/Downloads/mtcnn1/images/24/negative" pos_save_dir = "/Users/qiuxiaocong/Downloads/mtcnn1/images/24/positive" part_save_dir = "/Users/qiuxiaocong/Downloads/mtcnn1/images/24/part" # load ground truth from annotation file # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image # 整个训练集对应一个annotation文件,这个文件是通过transform.py生成的 # anno_file = '/Users/qiuxiaocong/Downloads/mtcnn-master/anno.txt' anno_file = '/Users/qiuxiaocong/Downloads/mtcnn1/imglists/anno_demo.txt' with open(anno_file, 'r') as f: annotations = f.readlines() # 每张图片一行annotation if net == "rnet": image_size = 24 if net == "onet": image_size = 48 # im_idx_list存放每张训练集图片的绝对路径 im_idx_list = list() # gt_boxes_list存放每张训练集图片的所有Ground Truth Box gt_boxes_list = list() num_of_images = len(annotations) print("processing %d images in total" % num_of_images) for annotation in annotations: annotation = annotation.strip().split(' ') # 图片的绝对路径 im_idx = annotation[0] # boxes为一张图片的所有Ground Truth box boxes = list(map(float, annotation[1:])) boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) im_idx_list.append(im_idx) gt_boxes_list.append(boxes) # net网络 对应的训练数据的保存路径 save_path = "/Users/qiuxiaocong/Downloads/mtcnn1/prepare_data/%s" % net # save_path = "./prepare_data/%s"%net # 创建用于r_net训练的pos/neg/part,之后同样使用gen_imglist得到比例为3:1:1的image_set f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w') f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w') f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w') # 加载整个训练集经过P_NET检测得到的Region Proposal det_boxes = pickle.load( open(os.path.join(save_path, 'detections.pkl'), 'rb')) assert len( det_boxes) == num_of_images, "incorrect detections or ground truths" # index of neg, pos and part face, used as their image names n_idx = 0 p_idx = 0 d_idx = 0 image_done = 0 for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list): if image_done % 100 == 0: print("%d images done" % image_done) image_done += 1 # 意思是p_net未能对于某一张图片未能得到Region Proposal if dets.shape[0] == 0: continue img = cv2.imread(os.path.join(im_idx)) # 经过convert_to_square 去掉dets中的score项,只保留4个坐标项 dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) for box in dets: x_left, y_top, x_right, y_bottom, _ = box.astype(int) width = x_right - x_left + 1 height = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[ 1] - 1 or y_bottom > img.shape[0] - 1: continue # compute intersection over union(IoU) between current box and all gt boxes Iou = IoU(box, gts) # 从原图截取,并resize至24×24(或48×48) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label # 当前的Region Proposal与所有GT的最大IOU都小于0.3,那么这个box必定是neg if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) # print(save_file) f2.write("%s/negative/%s" % (image_size, n_idx) + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) # assigned_gt为与当前box最接近的Ground Truth Box assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(width) offset_y1 = (y1 - y_top) / float(height) offset_x2 = (x2 - x_right) / float(width) offset_y2 = (y2 - y_bottom) / float(height) # save positive and part-face images and write labels if np.max(Iou) >= 0.65: save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) # print(save_file) print() f1.write("%s/positive/%s" % (image_size, p_idx) + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) # print(save_file) f3.write("%s/part/%s" % (image_size, d_idx) + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 f1.close() f2.close() f3.close()
def save_hard_example(data, test_mode, save_path): """ 对模型测试的结果根据预测框和ground truth的IoU进行划分,用于训练下一个网络的困难数据集 :param data: 模型测试的图片信息数据 :param test_mode: 测试的网络模型,(PNet,RNet) :param save_path: 测试的模型pickle结果保存的路径 :return: """ im_idx_list = data['images'] gt_boxes_list = data['bboxes'] num_of_images = len(im_idx_list) print("共需处理图片数:", num_of_images) # 不同样本图片保存路径 if test_mode == 'PNet': pos_label_file = path_config.rnet_pos_txt_path part_label_file = path_config.rnet_part_txt_path neg_label_file = path_config.rnet_neg_txt_path elif test_mode == 'RNet': pos_label_file = path_config.onet_pos_txt_path part_label_file = path_config.onet_part_txt_path neg_label_file = path_config.onet_neg_txt_path else: raise ValueError('网络类型(--test_mode)错误!') pos_file = open(pos_label_file, 'w') part_file = open(part_label_file, 'w') neg_file = open(neg_label_file, 'w') # 读取检测结果pickle数据 det_boxes = pickle.load(open(os.path.join(save_path, 'detections.pkl'), 'rb')) assert len(det_boxes) == num_of_images, "incorrect detections or ground truths" # 负样本,正样本,部分样本的图片数量,作为文件名 n_idx = 0 p_idx = 0 d_idx = 0 image_done = 0 # 已处理图片 no_pos_image_num = 0 # 没有产生正样本的累积图片数量 old_p_idx = -1 # 上一张图片的正样本总数 for im_idx, actual_detections, gts in zip(im_idx_list, det_boxes, gt_boxes_list): gts = np.array(gts, dtype=np.float32).reshape(-1, 4) # 当前正样本总数与上一张图片的正样本总数相等,说明当前图片没有产生正样本 if old_p_idx == p_idx: no_pos_image_num += 1 else: old_p_idx = p_idx if (image_done + 1) % 100 == 0: print("生成进度:{}/{}".format(image_done + 1, num_of_images)) print("neg:{}, pos:{}, part:{}, no pos image:{}".format(n_idx, p_idx, d_idx, no_pos_image_num)) image_done += 1 if actual_detections.shape[0] == 0: continue # 给每个检测框划分为对应的训练样本:IoU<0.3为负样本,0.4~0.65为部分样本,>0.65为正样本 img = cv2.imread(im_idx) # 将检测结果转为方形,因为下一个网络输入为方形输入 squared_detections = convert_to_square(actual_detections) squared_detections[:, 0:4] = np.round(squared_detections[:, 0:4]) for index, box in enumerate(squared_detections): x_left, y_top, x_right, y_bottom, _ = box.astype(int) width = x_right - x_left + 1 height = y_bottom - y_top + 1 # 忽略小图或越界的 if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1: continue # 计算实际检测框和ground truth检测框的IoU,但crop的图片是方形后的区域 iou = IoU(actual_detections[index], gts) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # 保存negative样本(IoU<0.3),并写label文件 if np.max(iou) < 0.3: save_file = os.path.join(neg_dir, "%s.jpg" % n_idx) neg_file.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 # 保存part样本(0.65>IoU>0.4)或positive样本(IoU>0.65),并写label文件 else: idx = np.argmax(iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # 计算bounding box回归量,作为训练样本 offset_x1 = (x1 - x_left) / float(width) offset_y1 = (y1 - y_top) / float(height) offset_x2 = (x2 - x_right) / float(width) offset_y2 = (y2 - y_bottom) / float(height) if np.max(iou) >= 0.65: save_file = os.path.join(pos_dir, "%s.jpg" % p_idx) pos_file.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % ( offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(iou) >= 0.4: save_file = os.path.join(part_dir, "%s.jpg" % d_idx) part_file.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % ( offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 neg_file.close() part_file.close() pos_file.close()
def generate_landmark_data(landmark_truth_txt_path, images_dir, net, argument=False): """ 为特定网络类型生成关键点训练样本,label=-2 :param landmark_truth_txt_path: 包含image path, bounding box, and landmarks的txt路径 :param images_dir: 图片文件夹路径 :param net: 网络类型,('PNet', 'RNet', 'ONet') :param argument: 是否进行数据增强 :return: images and related landmarks """ if net == "PNet": size = 12 landmark_dir = path_config.pnet_landmark_dir net_data_root_dir = path_config.pnet_dir landmark_file = open(path_config.pnet_landmark_txt_path, 'w') elif net == "RNet": size = 24 landmark_dir = path_config.rnet_landmark_dir net_data_root_dir = path_config.rnet_dir landmark_file = open(path_config.rnet_landmark_txt_path, 'w') elif net == "ONet": size = 48 landmark_dir = path_config.onet_landmark_dir net_data_root_dir = path_config.onet_dir landmark_file = open(path_config.onet_landmark_txt_path, 'w') else: raise ValueError('网络类型(--net)错误!') if not os.path.exists(net_data_root_dir): os.mkdir(net_data_root_dir) if not os.path.exists(landmark_dir): os.mkdir(landmark_dir) # 读取关键点信息文件:image path , bounding box, and landmarks data = get_landmark_data(landmark_truth_txt_path, images_dir) # 针对每张图片,生成关键点训练数据 landmark_idx = 0 image_id = 0 for (imgPath, bbox, landmarkGt) in data: # 截取的图片数据和图片中关键点位置数据 cropped_images = [] cropped_landmarks = [] img = cv2.imread(imgPath) assert (img is not None) image_height, image_width, _ = img.shape gt_box = np.array([[bbox.left, bbox.top, bbox.right, bbox.bottom]]) square_gt_box = np.squeeze(convert_to_square(gt_box)) # 防止越界,同时保持方形 if square_gt_box[0] < 0: square_gt_box[2] -= square_gt_box[0] square_gt_box[0] = 0 if square_gt_box[1] < 0: square_gt_box[3] -= square_gt_box[1] square_gt_box[1] = 0 if square_gt_box[2] > image_width: square_gt_box[0] -= (square_gt_box[2] - image_width) square_gt_box[2] = image_width if square_gt_box[3] > image_height: square_gt_box[1] -= (square_gt_box[3] - image_height) square_gt_box[3] = image_height gt_box = np.squeeze(gt_box) # 计算标准化的关键点坐标 landmark = np.zeros((5, 2)) for index, one in enumerate(landmarkGt): # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box landmark[index] = ((one[0] - square_gt_box[0]) / (square_gt_box[2] - square_gt_box[0]), (one[1] - square_gt_box[1]) / (square_gt_box[3] - square_gt_box[1])) cropped_landmarks.append(landmark.reshape(10)) # 截取目标区域图片 cropped_object_image = img[square_gt_box[1]:square_gt_box[3] + 1, square_gt_box[0]:square_gt_box[2] + 1] cropped_object_image = cv2.resize(cropped_object_image, (size, size)) cropped_images.append(cropped_object_image) landmark = np.zeros((5, 2)) if argument: landmark_idx = landmark_idx + 1 if landmark_idx % 100 == 0: sys.stdout.write("\r{}/{} images done ...".format( landmark_idx, len(data))) # ground truth的坐标、宽和高 x_truth_left, y_truth_top, x_truth_right, y_truth_bottom = gt_box width_truth = x_truth_right - x_truth_left + 1 height_truth = y_truth_bottom - y_truth_top + 1 if max(width_truth, height_truth) < 20 or x_truth_left < 0 or y_truth_top < 0: continue # 随机偏移 shift_num = 0 shift_try = 0 while shift_num < 10 and shift_try < 100: bbox_size = npr.randint( int(min(width_truth, height_truth) * 0.8), np.ceil(1.25 * max(width_truth, height_truth))) delta_x = npr.randint(int(-width_truth * 0.2), np.ceil(width_truth * 0.2)) delta_y = npr.randint(int(-height_truth * 0.2), np.ceil(height_truth * 0.2)) x_left_shift = int( max( x_truth_left + width_truth / 2 - bbox_size / 2 + delta_x, 0)) y_top_shift = int( max( y_truth_top + height_truth / 2 - bbox_size / 2 + delta_y, 0)) x_right_shift = x_left_shift + bbox_size y_bottom_shift = y_top_shift + bbox_size if x_right_shift > image_width or y_bottom_shift > image_height: shift_try += 1 continue crop_box = np.array( [x_left_shift, y_top_shift, x_right_shift, y_bottom_shift]) # 计算数据增强后的偏移区域和ground truth的方形校正IoU iou = square_IoU(crop_box, np.expand_dims(gt_box, 0)) if iou > 0.65: shift_num += 1 cropped_im = img[y_top_shift:y_bottom_shift + 1, x_left_shift:x_right_shift + 1, :] resized_im = cv2.resize(cropped_im, (size, size)) cropped_images.append(resized_im) # 标准化 for index, one in enumerate(landmarkGt): landmark[index] = ((one[0] - x_left_shift) / bbox_size, (one[1] - y_top_shift) / bbox_size) cropped_landmarks.append(landmark.reshape(10)) # 进行其他类型的数据增强 landmark = np.zeros((5, 2)) landmark_ = cropped_landmarks[-1].reshape(-1, 2) bbox = BBox([ x_left_shift, y_top_shift, x_right_shift, y_bottom_shift ]) # 镜像 if random.choice([0, 1]) > 0: face_flipped, landmark_flipped = flip( resized_im, landmark_) face_flipped = cv2.resize(face_flipped, (size, size)) cropped_images.append(face_flipped) cropped_landmarks.append(landmark_flipped.reshape(10)) # 顺时针旋转 if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = \ rotate(img, bbox, bbox.reprojectLandmark(landmark_), 5) # landmark_offset landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) cropped_images.append(face_rotated_by_alpha) cropped_landmarks.append(landmark_rotated.reshape(10)) # 上下翻转 face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) cropped_images.append(face_flipped) cropped_landmarks.append(landmark_flipped.reshape(10)) # 逆时针旋转 if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = \ rotate(img, bbox, bbox.reprojectLandmark(landmark_), -5) landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) cropped_images.append(face_rotated_by_alpha) cropped_landmarks.append(landmark_rotated.reshape(10)) # 上下翻转 face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) cropped_images.append(face_flipped) cropped_landmarks.append(landmark_flipped.reshape(10)) else: shift_try += 1 # 保存关键点训练图片及坐标信息 cropped_images, cropped_landmarks = np.asarray( cropped_images), np.asarray(cropped_landmarks) for i in range(len(cropped_images)): if np.any(cropped_landmarks[i] < 0): continue if np.any(cropped_landmarks[i] > 1): continue cv2.imwrite(os.path.join(landmark_dir, "%d.jpg" % image_id), cropped_images[i]) landmarks = map(str, list(cropped_landmarks[i])) landmark_file.write( os.path.join(landmark_dir, "%d.jpg" % image_id) + " -2 " + " ".join(landmarks) + "\n") image_id = image_id + 1 landmark_file.close() return