def save_hard_example(data, test_mode, save_path): """ 对模型测试的结果根据预测框和ground truth的IoU进行划分,用于训练下一个网络的困难数据集 :param data: 模型测试的图片信息数据 :param test_mode: 测试的网络模型,(PNet,RNet) :param save_path: 测试的模型pickle结果保存的路径 :return: """ im_idx_list = data['images'] gt_boxes_list = data['bboxes'] num_of_images = len(im_idx_list) print("共需处理图片数:", num_of_images) # 不同样本图片保存路径 if test_mode == 'PNet': pos_label_file = path_config.rnet_pos_txt_path part_label_file = path_config.rnet_part_txt_path neg_label_file = path_config.rnet_neg_txt_path elif test_mode == 'RNet': pos_label_file = path_config.onet_pos_txt_path part_label_file = path_config.onet_part_txt_path neg_label_file = path_config.onet_neg_txt_path else: raise ValueError('网络类型(--test_mode)错误!') pos_file = open(pos_label_file, 'w') part_file = open(part_label_file, 'w') neg_file = open(neg_label_file, 'w') # 读取检测结果pickle数据 det_boxes = pickle.load(open(os.path.join(save_path, 'detections.pkl'), 'rb')) assert len(det_boxes) == num_of_images, "incorrect detections or ground truths" # 负样本,正样本,部分样本的图片数量,作为文件名 n_idx = 0 p_idx = 0 d_idx = 0 image_done = 0 # 已处理图片 no_pos_image_num = 0 # 没有产生正样本的累积图片数量 old_p_idx = -1 # 上一张图片的正样本总数 for im_idx, actual_detections, gts in zip(im_idx_list, det_boxes, gt_boxes_list): gts = np.array(gts, dtype=np.float32).reshape(-1, 4) # 当前正样本总数与上一张图片的正样本总数相等,说明当前图片没有产生正样本 if old_p_idx == p_idx: no_pos_image_num += 1 else: old_p_idx = p_idx if (image_done + 1) % 100 == 0: print("生成进度:{}/{}".format(image_done + 1, num_of_images)) print("neg:{}, pos:{}, part:{}, no pos image:{}".format(n_idx, p_idx, d_idx, no_pos_image_num)) image_done += 1 if actual_detections.shape[0] == 0: continue # 给每个检测框划分为对应的训练样本:IoU<0.3为负样本,0.4~0.65为部分样本,>0.65为正样本 img = cv2.imread(im_idx) # 将检测结果转为方形,因为下一个网络输入为方形输入 squared_detections = convert_to_square(actual_detections) squared_detections[:, 0:4] = np.round(squared_detections[:, 0:4]) for index, box in enumerate(squared_detections): x_left, y_top, x_right, y_bottom, _ = box.astype(int) width = x_right - x_left + 1 height = y_bottom - y_top + 1 # 忽略小图或越界的 if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1: continue # 计算实际检测框和ground truth检测框的IoU,但crop的图片是方形后的区域 iou = IoU(actual_detections[index], gts) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # 保存negative样本(IoU<0.3),并写label文件 if np.max(iou) < 0.3: save_file = os.path.join(neg_dir, "%s.jpg" % n_idx) neg_file.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 # 保存part样本(0.65>IoU>0.4)或positive样本(IoU>0.65),并写label文件 else: idx = np.argmax(iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # 计算bounding box回归量,作为训练样本 offset_x1 = (x1 - x_left) / float(width) offset_y1 = (y1 - y_top) / float(height) offset_x2 = (x2 - x_right) / float(width) offset_y2 = (y2 - y_bottom) / float(height) if np.max(iou) >= 0.65: save_file = os.path.join(pos_dir, "%s.jpg" % p_idx) pos_file.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % ( offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(iou) >= 0.4: save_file = os.path.join(part_dir, "%s.jpg" % d_idx) part_file.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % ( offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 neg_file.close() part_file.close() pos_file.close()
neg_num = 0 #1---->50 # keep crop random parts, until have 50 negative examples # get 50 negative sample from every image while neg_num < 50: #neg_num's size [40,min(width, height) / 2],min_size:40 # size is a random number between 12 and min(width,height) size = npr.randint(12, min(width, height) / 2) #top_left coordinate nx = npr.randint(0, width - size) ny = npr.randint(0, height - size) #random crop crop_box = np.array([nx, ny, nx + size, ny + size]) #calculate iou Iou = IoU(crop_box, boxes) #crop a part from inital image cropped_im = img[ny:ny + size, nx:nx + size, :] #resize the cropped image to size 12*12 resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write("../../DATA/12/negative/%s.jpg" % n_idx + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 neg_num += 1
def GenerateData(ftxt, data_path, output_path, img_output_path, net, argument=False): ''' 参数 ------------ ftxt: path of anno file data_path: 数据集所在目录 output_path: 文本文件输出目录地址 img_output_path: 图片输出地址 net: String 三个网络之一的名字 argument: 是否使用数据增强 返回值 ------------- images and related landmarks ''' if net == "PNet": size = 12 elif net == "RNet": size = 24 elif net == "ONet": size = 48 else: print('Net type error') return image_id = 0 # f = open(join(output_path, "landmark_%s_aug.txt" % (size)), 'w') #img_output_path = "train_landmark_few" # get image path , bounding box, and landmarks from file 'ftxt' data = getDataFromTxt(ftxt, data_path=data_path) idx = 0 #image_path bbox landmark(5*2) for (imgPath, bbox, landmarkGt) in data: #print imgPath F_imgs = [] F_landmarks = [] #print(imgPath) img = cv2.imread(imgPath) assert (img is not None) img_h, img_w, img_c = img.shape gt_box = np.array([bbox.left, bbox.top, bbox.right, bbox.bottom]) #get sub-image from bbox f_face = img[bbox.top:bbox.bottom + 1, bbox.left:bbox.right + 1] # resize the gt image to specified size f_face = cv2.resize(f_face, (size, size)) #initialize the landmark landmark = np.zeros((5, 2)) #normalize land mark by dividing the width and height of the ground truth bounding box # landmakrGt is a list of tuples for index, one in enumerate(landmarkGt): # 重新计算因裁剪过后而改变的landmark的坐标,并且进行归一化 # (x - bbox.left) / width of bbox, (y - bbox.top) / height of bbox rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]), (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1])) landmark[index] = rv F_imgs.append(f_face) F_landmarks.append(landmark.reshape(10)) #[x1, y1, x2, y2, ...] landmark = np.zeros((5, 2)) # data augment if argument: idx = idx + 1 if idx % 100 == 0: print(idx, "images done") x1, y1, x2, y2 = gt_box #gt's width gt_w = x2 - x1 + 1 #gt's height gt_h = y2 - y1 + 1 if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0: continue #random shift for i in range(10): bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h))) delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2) delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2) nx1 = int(max(x1 + gt_w / 2 - bbox_size / 2 + delta_x, 0)) ny1 = int(max(y1 + gt_h / 2 - bbox_size / 2 + delta_y, 0)) nx2 = nx1 + bbox_size ny2 = ny1 + bbox_size if nx2 > img_w or ny2 > img_h: continue crop_box = np.array([nx1, ny1, nx2, ny2]) cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] resized_im = cv2.resize(cropped_im, (size, size)) #calculate iou iou = IoU(crop_box, np.expand_dims(gt_box, 0)) if iou > 0.65: F_imgs.append(resized_im) #normalize for index, one in enumerate(landmarkGt): rv = ((one[0] - nx1) / bbox_size, (one[1] - ny1) / bbox_size) landmark[index] = rv F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) landmark_ = F_landmarks[-1].reshape(-1, 2) bbox = BBox([nx1, ny1, nx2, ny2]) #mirror if random.choice([0, 1]) > 0: face_flipped, landmark_flipped = flip( resized_im, landmark_) face_flipped = cv2.resize(face_flipped, (size, size)) #c*h*w F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #rotate if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), 5)#逆时针旋转 #landmark_offset landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) #flip face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #anti-clockwise rotation if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), -5)#顺时针旋转 landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks) #print F_imgs.shape #print F_landmarks.shape for i in range(len(F_imgs)): # 只要有一个坐标小于0或大于1就舍弃 if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0: continue if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0: continue cv2.imwrite(join(img_output_path, "%d.jpg" % (image_id)), F_imgs[i]) landmarks = map(str, list(F_landmarks[i])) f.write( join(dstdir, "%d.jpg" % (image_id)) + " -2 " + " ".join(landmarks) + "\n") image_id = image_id + 1 #print F_imgs.shape #print F_landmarks.shape #F_imgs = processImage(F_imgs) #shuffle_in_unison_scary(F_imgs, F_landmarks) f.close() return F_imgs, F_landmarks
def GenerateData(ftxt, data_path, net, argument=False): ''' :param ftxt: name/path of the text file that contains image path, bounding box, and landmarks :param output: path of the output dir :param net: one of the net in the cascaded networks :param argument: apply augmentation or not :return: images and related landmarks ''' if net == "PNet": size = 12 elif net == "RNet": size = 24 elif net == "ONet": size = 48 else: print('Net type error') return image_id = 0 # f = open(join(OUTPUT, "landmark_%s_aug.txt" % (size)), 'w') #dstdir = "train_landmark_few" # get image path , bounding box, and landmarks from file 'ftxt' data = getDataFromTxt(ftxt, data_path=data_path) # 图片路径,框-4,标注-(5,2) idx = 0 #image_path bbox landmark(5*2) for (imgPath, bbox, landmarkGt) in data: #print imgPath F_imgs = [] F_landmarks = [] #print(imgPath) img = cv2.imread(imgPath) assert (img is not None) img_h, img_w, img_c = img.shape gt_box = np.array([bbox.left, bbox.top, bbox.right, bbox.bottom]) #get sub-image from bbox 得到框出来的图 f_face = img[bbox.top:bbox.bottom + 1, bbox.left:bbox.right + 1] # resize the gt image to specified size 将大小调整指定的尺寸 f_face = cv2.resize(f_face, (size, size)) #initialize the landmark landmark = np.zeros((5, 2)) #normalize land mark by dividing the width and height of the ground truth bounding box # landmakrGt is a list of tuples 对标注进行归一化(除以框) for index, one in enumerate(landmarkGt): # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]), (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1])) # put the normalized value into the new list landmark landmark[index] = rv F_imgs.append(f_face) F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) if argument: # 数据集扩展 idx = idx + 1 if idx % 100 == 0: print(idx, "images done") x1, y1, x2, y2 = gt_box #gt's width gt_w = x2 - x1 + 1 #gt's height gt_h = y2 - y1 + 1 if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0: # 框的大小限制 continue #random shift for i in range(10): bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h))) # 框的大小 delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2) delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2) nx1 = int(max(x1 + gt_w / 2 - bbox_size / 2 + delta_x, 0)) ny1 = int(max(y1 + gt_h / 2 - bbox_size / 2 + delta_y, 0)) nx2 = nx1 + bbox_size ny2 = ny1 + bbox_size if nx2 > img_w or ny2 > img_h: continue crop_box = np.array([nx1, ny1, nx2, ny2]) cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] resized_im = cv2.resize(cropped_im, (size, size)) #cal iou iou = IoU(crop_box, np.expand_dims(gt_box, 0)) if iou > 0.65: F_imgs.append(resized_im) #normalize for index, one in enumerate(landmarkGt): rv = ((one[0] - nx1) / bbox_size, (one[1] - ny1) / bbox_size) landmark[index] = rv F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) landmark_ = F_landmarks[-1].reshape(-1, 2) bbox = BBox([nx1, ny1, nx2, ny2]) #mirror if random.choice([0, 1]) > 0: face_flipped, landmark_flipped = flip( resized_im, landmark_) face_flipped = cv2.resize(face_flipped, (size, size)) #c*h*w F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #rotate if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), 5)#逆时针旋转 #landmark_offset landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) #flip face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #anti-clockwise rotation if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), -5)#顺时针旋转 landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks) #print F_imgs.shape #print F_landmarks.shape for i in range(len(F_imgs)): #if image_id % 100 == 0: #print('image id : ', image_id) if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0: continue if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0: continue cv2.imwrite(join(dstdir, "%d.jpg" % (image_id)), F_imgs[i]) landmarks = map(str, list(F_landmarks[i])) f.write( join(dstdir, "%d.jpg" % (image_id)) + " -2 " + " ".join(landmarks) + "\n") image_id = image_id + 1 #print F_imgs.shape #print F_landmarks.shape #F_imgs = processImage(F_imgs) #shuffle_in_unison_scary(F_imgs, F_landmarks) f.close() return F_imgs, F_landmarks
if idx % 100 == 0: print(idx, "images done") height, width, channel = img.shape # 输入图片的高/宽/通道数 neg_num = 0 # 每张image生成50个negative sample[不依赖于GT产生] 即其产生的neg sample与GT可能没有IOU值 while neg_num < 50: # 从[12,min(width, height) / 2)范围中生成一个随机数 size = npr.randint(12, min(width, height) / 2) nx = npr.randint(0, width - size) ny = npr.randint(0, height - size) # 仅仅是box的左上角x/y坐标和右下角x/y坐标,还不是图片 crop_box = np.array([nx, ny, nx + size, ny + size]) Iou = IoU(crop_box, boxes) # 从原图中crop得到的图片 cropped_im = img[ny:ny + size, nx:nx + size, :] # 将crop部分resize成 12×12,输入P_NET resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) # IOU最大值都小于0.3的图片归入negative部分 if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write("12/negative/%s" % n_idx + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 # 用于记录总共多少negative sample neg_num += 1 # 用于每张图片的50个negative sample的选取
def save_hard_example(net): image_dir = "/Users/qiuxiaocong/Downloads/WIDER_train/images" # 用于存放24×24的R_NET 训练数据 images! neg_save_dir = "/Users/qiuxiaocong/Downloads/mtcnn1/images/24/negative" pos_save_dir = "/Users/qiuxiaocong/Downloads/mtcnn1/images/24/positive" part_save_dir = "/Users/qiuxiaocong/Downloads/mtcnn1/images/24/part" # load ground truth from annotation file # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image # 整个训练集对应一个annotation文件,这个文件是通过transform.py生成的 # anno_file = '/Users/qiuxiaocong/Downloads/mtcnn-master/anno.txt' anno_file = '/Users/qiuxiaocong/Downloads/mtcnn1/imglists/anno_demo.txt' with open(anno_file, 'r') as f: annotations = f.readlines() # 每张图片一行annotation if net == "rnet": image_size = 24 if net == "onet": image_size = 48 # im_idx_list存放每张训练集图片的绝对路径 im_idx_list = list() # gt_boxes_list存放每张训练集图片的所有Ground Truth Box gt_boxes_list = list() num_of_images = len(annotations) print("processing %d images in total" % num_of_images) for annotation in annotations: annotation = annotation.strip().split(' ') # 图片的绝对路径 im_idx = annotation[0] # boxes为一张图片的所有Ground Truth box boxes = list(map(float, annotation[1:])) boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) im_idx_list.append(im_idx) gt_boxes_list.append(boxes) # net网络 对应的训练数据的保存路径 save_path = "/Users/qiuxiaocong/Downloads/mtcnn1/prepare_data/%s" % net # save_path = "./prepare_data/%s"%net # 创建用于r_net训练的pos/neg/part,之后同样使用gen_imglist得到比例为3:1:1的image_set f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w') f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w') f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w') # 加载整个训练集经过P_NET检测得到的Region Proposal det_boxes = pickle.load( open(os.path.join(save_path, 'detections.pkl'), 'rb')) assert len( det_boxes) == num_of_images, "incorrect detections or ground truths" # index of neg, pos and part face, used as their image names n_idx = 0 p_idx = 0 d_idx = 0 image_done = 0 for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list): if image_done % 100 == 0: print("%d images done" % image_done) image_done += 1 # 意思是p_net未能对于某一张图片未能得到Region Proposal if dets.shape[0] == 0: continue img = cv2.imread(os.path.join(im_idx)) # 经过convert_to_square 去掉dets中的score项,只保留4个坐标项 dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) for box in dets: x_left, y_top, x_right, y_bottom, _ = box.astype(int) width = x_right - x_left + 1 height = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[ 1] - 1 or y_bottom > img.shape[0] - 1: continue # compute intersection over union(IoU) between current box and all gt boxes Iou = IoU(box, gts) # 从原图截取,并resize至24×24(或48×48) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label # 当前的Region Proposal与所有GT的最大IOU都小于0.3,那么这个box必定是neg if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) # print(save_file) f2.write("%s/negative/%s" % (image_size, n_idx) + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) # assigned_gt为与当前box最接近的Ground Truth Box assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(width) offset_y1 = (y1 - y_top) / float(height) offset_x2 = (x2 - x_right) / float(width) offset_y2 = (y2 - y_bottom) / float(height) # save positive and part-face images and write labels if np.max(Iou) >= 0.65: save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) # print(save_file) print() f1.write("%s/positive/%s" % (image_size, p_idx) + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) # print(save_file) f3.write("%s/part/%s" % (image_size, d_idx) + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 f1.close() f2.close() f3.close()
def test(self, threshold, model_iter): """ :param threshold: :param model_iter: :return: """ all_boxes, landmark = self.detector.detect_images(self.test_loader) hard_samples = list() recall = 0 # 召回率:TP / (TP + TN) acc_pos = 0 acc_all = 0 precision = 0 # 精确率:TP / (TP + FP) save_path = os.path.join(os.path.dirname(self.images_path[0]), '..', 'result') if not os.path.exists(save_path): os.makedirs(save_path) for index, image_path in enumerate(self.images_path): image_name = os.path.basename(image_path) ground_truth = self.ground_map[image_name] if len(all_boxes[index]) == 0: print('图片{}检测不到车牌'.format(image_name)) continue # 计算iou,并画框 iou = np.ones((len(all_boxes[index]),)) gt_boxes = np.array([ground_truth]) for j, box in enumerate(all_boxes[index]): iou[j] = IoU(box, gt_boxes) ''' # 画图 im = cv2.imread(image_path) for j, box in enumerate(all_boxes[index]): # if image_name == '20180929172716720_23609_dqp001_甘A5T470.jpg': # pdb.set_trace() # 绘制iou大于阈值的pos框 if iou[j] > threshold: cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 0), 2) cv2.putText(im, '{:s}|{:.2f}|{:.2f}'.format('p', box[4], iou[j]), (int(box[0]), int(box[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0)) for k in range(5): cv2.circle(im, (landmark[index][j][2*k], landmark[index][j][2*k+1]), 1, (0, 0, 255), 4) # 绘制ground truth cv2.rectangle(im, (int(ground_truth[0]), int(ground_truth[1])), (int(ground_truth[2]), int(ground_truth[3])), (0, 255, 0), 2) cv2.imwrite(os.path.join(save_path, os.path.splitext(image_name)[0] + '_' + model_iter + '.jpg'), im) print('IoU:\n', iou) print('average iou = {}'.format(sum(iou) / sum(iou != 0))) ''' # 计算检测框iou大于阈值的平均精度 if iou.max() > threshold: recall += 1 acc_pos += np.mean(all_boxes[index][iou > threshold, 4]) acc_all += np.mean(all_boxes[index][:, 4]) precision += len(all_boxes[index][iou > threshold, 4]) / len(all_boxes[index][:, 4]) else: hard_samples.append(image_path) precision /= recall acc_pos /= recall acc_all /= recall recall /= self.test_loader.size print('IoU threshold={}:'.format(threshold), 'precision={},'.format(precision), ' acc-pos={},'.format(acc_pos), 'acc-all={}'.format(acc_all), 'recall={}'.format(recall)) return precision, acc_pos, acc_all, recall
min_face_size=min_face_size, stride=stride, threshold=thresh) gt_imdb = [] path = "E:/Document/Datasets/Wider Face/WIDER_val/images" gt_data = load_wider_face_gt_boxes("wider_face_val_bbx_gt.txt") for item in gt_data.keys(): gt_imdb.append(os.path.join(path, item)) test_data = TestLoader(gt_imdb) all_boxes, landmarks = mtcnn_detector.detect_face(test_data) count = 0 scores = [] recall_rate = 0 for imagepath in gt_imdb: for bbox in all_boxes[count]: rate = len(all_boxes) score = 0 for gt_boxes in gt_data[imagepath]: iou = IoU(bbox, gt_boxes) if score > iou: score = iou count = count + 1
def save_12net_data(data_dir): anno_file = "./prepare_data/wider_face_train.txt" im_dir = data_dir + "/WIDER_train/images" save_dir = data_dir + "/12" pos_save_dir = data_dir + "/12/positive" part_save_dir = data_dir + "/12/part" neg_save_dir = data_dir + '/12/negative' if not os.path.exists(save_dir): os.mkdir(save_dir) if not os.path.exists(pos_save_dir): os.mkdir(pos_save_dir) if not os.path.exists(part_save_dir): os.mkdir(part_save_dir) if not os.path.exists(neg_save_dir): os.mkdir(neg_save_dir) f1 = open(os.path.join(save_dir, 'pos_12.txt'), 'w') f2 = open(os.path.join(save_dir, 'neg_12.txt'), 'w') f3 = open(os.path.join(save_dir, 'part_12.txt'), 'w') with open(anno_file, 'r') as f: annotations = f.readlines() num = len(annotations) print("%d pics in total" % num) p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # don't care idx = 0 box_idx = 0 for annotation in annotations: annotation = annotation.strip().split(' ') #image path im_path = annotation[0] #boxed change to float type bbox = list(map(float, annotation[1:])) #gt boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) #load image img = cv2.imread(os.path.join(im_dir, im_path + '.jpg')) height, width, channel = img.shape idx += 1 #if idx % 100 == 0: #print(idx, "images done") neg_num = 0 # crop image randomly, 1---->50 # keep crop random parts, until have 50 negative examples # get 50 negative sample from every image while neg_num < 50: # ============================================================== # #neg_num's size [40,min(width, height) / 2],min_size:40 # size is a random number between 12 and min(width,height) size = npr.randint(12, min(width, height) / 2) #top_left coordinate nx = npr.randint(0, width - size) ny = npr.randint(0, height - size) #random crop crop_box = np.array([nx, ny, nx + size, ny + size]) #calculate iou Iou = IoU(crop_box, boxes) #crop a part from inital image cropped_im = img[ny:ny + size, nx:nx + size, :] #resize the cropped image to size 12*12 resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) # ../data/12/negative/%s.jpg f2.write(save_dir + "/negative/%s.jpg" % n_idx + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 neg_num += 1 # ============================================================== # #for every bounding boxes for box in boxes: # box (x_left, y_top, x_right, y_bottom) x1, y1, x2, y2 = box #gt's width and height w = x2 - x1 + 1 h = y2 - y1 + 1 # ignore small faces and those faces has left-top corner out of the image # in case the ground truth boxes of small faces are not accurate if max(w, h) < 20 or x1 < 0 or y1 < 0: continue # crop another 5 images near the bounding box if IoU less than 0.5, save as negative samples for i in range(5): #size of the image to be cropped size = npr.randint(12, min(width, height) / 2) # ============================================================== # # delta_x and delta_y are offsets of (x1, y1) # max can make sure if the delta is a negative number , x1+delta_x >0 # parameter high of randint make sure there will be intersection between bbox and cropped_box delta_x = npr.randint(max(-size, -x1), w) delta_y = npr.randint(max(-size, -y1), h) # max here not really necessary nx1 = int(max(0, x1 + delta_x)) ny1 = int(max(0, y1 + delta_y)) # if the right bottom point is out of image then skip if nx1 + size > width or ny1 + size > height: continue crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size]) Iou = IoU(crop_box, boxes) cropped_im = img[ny1:ny1 + size, nx1:nx1 + size, :] #rexize cropped image to be 12 * 12 resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write(save_dir + "/negative/%s.jpg" % n_idx + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 # ============================================================== # #generate positive examples and part faces for i in range(20): # pos and part face size [minsize*0.8,maxsize*1.25] size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) if w < 5: print(w) continue # ============================================================== # # delta here is the offset of box center # - x1 + w/2 is the central point # - add offset to move the bbox # - deduct size/2 to compute x1, and also to make sure # that the right bottom corner will be out of bbox delta_x = npr.randint(-w * 0.2, w * 0.2) delta_y = npr.randint(-h * 0.2, h * 0.2) # show this way: nx1 = max(x1+w/2-size/2+delta_x) nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0)) # show this way: ny1 = max(y1+h/2-size/2+delta_y) ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0)) nx2 = nx1 + size ny2 = ny1 + size if nx2 > width or ny2 > height: continue crop_box = np.array([nx1, ny1, nx2, ny2]) # yu gt de offset # - will not be effected by resize option # - x, y is the location of rectangle box # - nx, ny is the location of square box # - offset_x = (x - nx) / w # => x = offset_x*w + nx # - offset_y = (y - ny) / h # => y = offset_y*h + ny # - [offset_x1, offset_y1, offset_x2, offset_y2] is the regression target offset_x1 = (x1 - nx1) / float(size) offset_y1 = (y1 - ny1) / float(size) offset_x2 = (x2 - nx2) / float(size) offset_y2 = (y2 - ny2) / float(size) #crop cropped_im = img[ny1:ny2, nx1:nx2, :] #resize resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) box_ = box.reshape(1, -1) iou = IoU(crop_box, box_) if iou >= 0.65: save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) f1.write(save_dir + "/positive/%s.jpg" % p_idx + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif iou >= 0.4: save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) f3.write(save_dir + "/part/%s.jpg" % d_idx + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 # ============================================================== # box_idx += 1 if idx % 100 == 0: print("%s images done, pos: %s part: %s neg: %s" % (idx, p_idx, d_idx, n_idx)) f1.close() f2.close() f3.close()
#print(idx, "images done") height, width, channel = img.shape neg_num = 0 #1---->50 #对每张图片都裁剪50张negative图片作为负样本 while neg_num < 50: size = npr.randint(12, min(width, height) / 2) #top_left coordinate nx = npr.randint(0, width - size) ny = npr.randint(0, height - size) #random crop crop_box = np.array([nx, ny, nx + size, ny + size]) #calculate iou Iou = IoU(crop_box, gt_boxes_single) #crop a part from inital image cropped_im = img[ny:ny + size, nx:nx + size, :] #resize the cropped image to size 12*12 resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write(neg_save_dir + "/%s.jpg" % n_idx + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 neg_num += 1
def generateData_aug(data_dir, net, argument=False): if net == "PNet": size = 12 elif net == "RNet": size = 24 elif net == "ONet": size = 48 else: print("Net type error! ") return OUTPUT = data_dir + "/%d" % size if not exists(OUTPUT): os.mkdir(OUTPUT) dstdir = data_dir + "/%d/train_%s_landmark_aug" % (size, net) if not exists(dstdir): os.mkdir(dstdir) assert (exists(dstdir) and exists(OUTPUT)) # get image path , bounding box, and landmarks from file 'ftxt' data = getDataFromTxt("./prepare_data/trainImageList.txt", data_path=data_dir + '/Align') f = open(join(OUTPUT, "landmark_%s_aug.txt" % (size)), 'w') image_id = 0 idx = 0 for (imgPath, bbox, landmarkGt) in data: F_imgs = [] F_landmarks = [] img = cv2.imread(imgPath) assert (img is not None) img_h, img_w, img_c = img.shape gt_box = np.array([bbox.left, bbox.top, bbox.right, bbox.bottom]) # get sub-image from bbox f_face = img[bbox.top:bbox.bottom + 1, bbox.left:bbox.right + 1] # resize the gt image to specified size f_face = cv2.resize(f_face, (size, size)) # initialize the landmark landmark = np.zeros((5, 2)) # normalize land mark by dividing the width and height of the ground truth bounding box # landmakrGt is a list of tuples for index, one in enumerate(landmarkGt): # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]), (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1])) landmark[index] = rv F_imgs.append(f_face) F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) if argument: idx = idx + 1 if idx % 100 == 0: print(idx, "images done") x1, y1, x2, y2 = gt_box gt_w = x2 - x1 + 1 gt_h = y2 - y1 + 1 if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0: continue #random shift for i in range(10): bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h))) delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2) delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2) nx1 = int(max(x1 + gt_w / 2 - bbox_size / 2 + delta_x, 0)) ny1 = int(max(y1 + gt_h / 2 - bbox_size / 2 + delta_y, 0)) nx2 = nx1 + bbox_size ny2 = ny1 + bbox_size if nx2 > img_w or ny2 > img_h: continue crop_box = np.array([nx1, ny1, nx2, ny2]) cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] resized_im = cv2.resize(cropped_im, (size, size)) #cal iou iou = IoU(crop_box, np.expand_dims(gt_box, 0)) if iou > 0.65: F_imgs.append(resized_im) #normalize for index, one in enumerate(landmarkGt): rv = ((one[0] - nx1) / bbox_size, (one[1] - ny1) / bbox_size) landmark[index] = rv F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) landmark_ = F_landmarks[-1].reshape(-1, 2) bbox = BBox([nx1, ny1, nx2, ny2]) #mirror if random.choice([0, 1]) > 0: face_flipped, landmark_flipped = flip( resized_im, landmark_) face_flipped = cv2.resize(face_flipped, (size, size)) #c*h*w F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #rotate if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), 5)#逆时针旋转 #landmark_offset landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) #flip face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #anti-clockwise rotation if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), -5)#顺时针旋转 landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks) for i in range(len(F_imgs)): if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0: continue if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0: continue cv2.imwrite(join(dstdir, "%d.jpg" % (image_id)), F_imgs[i]) landmarks = map(str, list(F_landmarks[i])) f.write(dstdir + "/%d.jpg" % (image_id) + " -2 " + " ".join(landmarks) + "\n") image_id = image_id + 1 f.close() return F_imgs, F_landmarks