def GenerateData(ftxt, data_path, output, net, argument=False): if net == "PNet": size = 12 elif net == "RNet": size = 24 elif net == "ONet": size = 48 else: print('Net type error') return image_id = 0 f = open(join(OUTPUT, "landmark_%s.txt" % (size)), 'w') data = getDataFromTxt(ftxt, data_path) idx = 0 #image_path bbox landmark(5*2) for (imgPath, bbox, landmarkGt) in data: #print imgPath F_imgs = [] F_landmarks = [] img = cv2.imread(imgPath) assert (img is not None) img_h, img_w, img_c = img.shape gt_box = np.array([bbox.left, bbox.top, bbox.right, bbox.bottom]) f_face = img[bbox.top:bbox.bottom + 1, bbox.left:bbox.right + 1] f_face = cv2.resize(f_face, (size, size)) landmark = np.zeros((5, 2)) #normalize for index, one in enumerate(landmarkGt): rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]), (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1])) landmark[index] = rv F_imgs.append(f_face) F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) if argument: idx = idx + 1 if idx % 100 == 0: print(idx, "images done") x1, y1, x2, y2 = gt_box #gt's width gt_w = x2 - x1 + 1 #gt's height gt_h = y2 - y1 + 1 if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0: continue #random shift for i in range(10): bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h))) delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2) delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2) nx1 = int(max(x1 + gt_w / 2 - bbox_size / 2 + delta_x, 0)) ny1 = int(max(y1 + gt_h / 2 - bbox_size / 2 + delta_y, 0)) nx2 = nx1 + bbox_size ny2 = ny1 + bbox_size if nx2 > img_w or ny2 > img_h: continue crop_box = np.array([nx1, ny1, nx2, ny2]) cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] resized_im = cv2.resize(cropped_im, (size, size)) #cal iou iou = IoU(crop_box, np.expand_dims(gt_box, 0)) if iou > 0.65: F_imgs.append(resized_im) #normalize for index, one in enumerate(landmarkGt): rv = ((one[0] - nx1) / bbox_size, (one[1] - ny1) / bbox_size) landmark[index] = rv F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) landmark_ = F_landmarks[-1].reshape(-1, 2) bbox = BBox([nx1, ny1, nx2, ny2]) #mirror if random.choice([0, 1]) > 0: face_flipped, landmark_flipped = flip( resized_im, landmark_) face_flipped = cv2.resize(face_flipped, (size, size)) #c*h*w F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #rotate if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), 5)#逆时针旋转 #landmark_offset landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) #flip face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #inverse clockwise rotation if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), -5)#顺时针旋转 landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks) #print F_imgs.shape #print F_landmarks.shape for i in range(len(F_imgs)): #print(image_id) if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0: continue if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0: continue cv2.imwrite(join(dstdir, "%d.jpg" % (image_id)), F_imgs[i]) landmarks = map(str, list(F_landmarks[i])) f.write( join(dstdir, "%d.jpg" % (image_id)) + " -2 " + " ".join(landmarks) + "\n") image_id = image_id + 1 #print F_imgs.shape #print F_landmarks.shape #F_imgs = processImage(F_imgs) #shuffle_in_unison_scary(F_imgs, F_landmarks) f.close() return F_imgs, F_landmarks
def GenerateData(ftxt, data_path, net, argument=False): ''' :param ftxt: name/path of the text file that contains image path, bounding box, and landmarks :param output: path of the output dir :param net: one of the net in the cascaded networks :param argument: apply augmentation or not :return: images and related landmarks ''' if net == "PNet": size = 12 elif net == "RNet": size = 24 elif net == "ONet": size = 48 else: print('Net type error') return image_id = 0 # f = open(join(OUTPUT, "landmark_%s_aug.txt" % (size)), 'w') #dstdir = "train_landmark_few" # get image path , bounding box, and landmarks from file 'ftxt' data = getDataFromTxt(ftxt, data_path=data_path) # 图片路径,框-4,标注-(5,2) idx = 0 #image_path bbox landmark(5*2) for (imgPath, bbox, landmarkGt) in data: #print imgPath F_imgs = [] F_landmarks = [] #print(imgPath) img = cv2.imread(imgPath) assert (img is not None) img_h, img_w, img_c = img.shape gt_box = np.array([bbox.left, bbox.top, bbox.right, bbox.bottom]) #get sub-image from bbox 得到框出来的图 f_face = img[bbox.top:bbox.bottom + 1, bbox.left:bbox.right + 1] # resize the gt image to specified size 将大小调整指定的尺寸 f_face = cv2.resize(f_face, (size, size)) #initialize the landmark landmark = np.zeros((5, 2)) #normalize land mark by dividing the width and height of the ground truth bounding box # landmakrGt is a list of tuples 对标注进行归一化(除以框) for index, one in enumerate(landmarkGt): # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]), (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1])) # put the normalized value into the new list landmark landmark[index] = rv F_imgs.append(f_face) F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) if argument: # 数据集扩展 idx = idx + 1 if idx % 100 == 0: print(idx, "images done") x1, y1, x2, y2 = gt_box #gt's width gt_w = x2 - x1 + 1 #gt's height gt_h = y2 - y1 + 1 if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0: # 框的大小限制 continue #random shift for i in range(10): bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h))) # 框的大小 delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2) delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2) nx1 = int(max(x1 + gt_w / 2 - bbox_size / 2 + delta_x, 0)) ny1 = int(max(y1 + gt_h / 2 - bbox_size / 2 + delta_y, 0)) nx2 = nx1 + bbox_size ny2 = ny1 + bbox_size if nx2 > img_w or ny2 > img_h: continue crop_box = np.array([nx1, ny1, nx2, ny2]) cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] resized_im = cv2.resize(cropped_im, (size, size)) #cal iou iou = IoU(crop_box, np.expand_dims(gt_box, 0)) if iou > 0.65: F_imgs.append(resized_im) #normalize for index, one in enumerate(landmarkGt): rv = ((one[0] - nx1) / bbox_size, (one[1] - ny1) / bbox_size) landmark[index] = rv F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) landmark_ = F_landmarks[-1].reshape(-1, 2) bbox = BBox([nx1, ny1, nx2, ny2]) #mirror if random.choice([0, 1]) > 0: face_flipped, landmark_flipped = flip( resized_im, landmark_) face_flipped = cv2.resize(face_flipped, (size, size)) #c*h*w F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #rotate if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), 5)#逆时针旋转 #landmark_offset landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) #flip face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #anti-clockwise rotation if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), -5)#顺时针旋转 landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks) #print F_imgs.shape #print F_landmarks.shape for i in range(len(F_imgs)): #if image_id % 100 == 0: #print('image id : ', image_id) if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0: continue if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0: continue cv2.imwrite(join(dstdir, "%d.jpg" % (image_id)), F_imgs[i]) landmarks = map(str, list(F_landmarks[i])) f.write( join(dstdir, "%d.jpg" % (image_id)) + " -2 " + " ".join(landmarks) + "\n") image_id = image_id + 1 #print F_imgs.shape #print F_landmarks.shape #F_imgs = processImage(F_imgs) #shuffle_in_unison_scary(F_imgs, F_landmarks) f.close() return F_imgs, F_landmarks
def generateData_aug(data_dir, net, argument=False): if net == "PNet": size = 12 elif net == "RNet": size = 24 elif net == "ONet": size = 48 else: print("Net type error! ") return OUTPUT = data_dir + "/%d" % size if not exists(OUTPUT): os.mkdir(OUTPUT) dstdir = data_dir + "/%d/train_%s_landmark_aug" % (size, net) if not exists(dstdir): os.mkdir(dstdir) assert (exists(dstdir) and exists(OUTPUT)) # get image path , bounding box, and landmarks from file 'ftxt' data = getDataFromTxt("./prepare_data/trainImageList.txt", data_path=data_dir + '/Align') f = open(join(OUTPUT, "landmark_%s_aug.txt" % (size)), 'w') image_id = 0 idx = 0 for (imgPath, bbox, landmarkGt) in data: F_imgs = [] F_landmarks = [] img = cv2.imread(imgPath) assert (img is not None) img_h, img_w, img_c = img.shape gt_box = np.array([bbox.left, bbox.top, bbox.right, bbox.bottom]) # get sub-image from bbox f_face = img[bbox.top:bbox.bottom + 1, bbox.left:bbox.right + 1] # resize the gt image to specified size f_face = cv2.resize(f_face, (size, size)) # initialize the landmark landmark = np.zeros((5, 2)) # normalize land mark by dividing the width and height of the ground truth bounding box # landmakrGt is a list of tuples for index, one in enumerate(landmarkGt): # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]), (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1])) landmark[index] = rv F_imgs.append(f_face) F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) if argument: idx = idx + 1 if idx % 100 == 0: print(idx, "images done") x1, y1, x2, y2 = gt_box gt_w = x2 - x1 + 1 gt_h = y2 - y1 + 1 if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0: continue #random shift for i in range(10): bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h))) delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2) delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2) nx1 = int(max(x1 + gt_w / 2 - bbox_size / 2 + delta_x, 0)) ny1 = int(max(y1 + gt_h / 2 - bbox_size / 2 + delta_y, 0)) nx2 = nx1 + bbox_size ny2 = ny1 + bbox_size if nx2 > img_w or ny2 > img_h: continue crop_box = np.array([nx1, ny1, nx2, ny2]) cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] resized_im = cv2.resize(cropped_im, (size, size)) #cal iou iou = IoU(crop_box, np.expand_dims(gt_box, 0)) if iou > 0.65: F_imgs.append(resized_im) #normalize for index, one in enumerate(landmarkGt): rv = ((one[0] - nx1) / bbox_size, (one[1] - ny1) / bbox_size) landmark[index] = rv F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) landmark_ = F_landmarks[-1].reshape(-1, 2) bbox = BBox([nx1, ny1, nx2, ny2]) #mirror if random.choice([0, 1]) > 0: face_flipped, landmark_flipped = flip( resized_im, landmark_) face_flipped = cv2.resize(face_flipped, (size, size)) #c*h*w F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #rotate if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), 5)#逆时针旋转 #landmark_offset landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) #flip face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #anti-clockwise rotation if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), -5)#顺时针旋转 landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks) for i in range(len(F_imgs)): if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0: continue if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0: continue cv2.imwrite(join(dstdir, "%d.jpg" % (image_id)), F_imgs[i]) landmarks = map(str, list(F_landmarks[i])) f.write(dstdir + "/%d.jpg" % (image_id) + " -2 " + " ".join(landmarks) + "\n") image_id = image_id + 1 f.close() return F_imgs, F_landmarks
def GenerateData(ftxt, data_path, output_path, img_output_path, net, argument=False): ''' 参数 ------------ ftxt: path of anno file data_path: 数据集所在目录 output_path: 文本文件输出目录地址 img_output_path: 图片输出地址 net: String 三个网络之一的名字 argument: 是否使用数据增强 返回值 ------------- images and related landmarks ''' if net == "PNet": size = 12 elif net == "RNet": size = 24 elif net == "ONet": size = 48 else: print('Net type error') return image_id = 0 # f = open(join(output_path, "landmark_%s_aug.txt" % (size)), 'w') #img_output_path = "train_landmark_few" # get image path , bounding box, and landmarks from file 'ftxt' data = getDataFromTxt(ftxt, data_path=data_path) idx = 0 #image_path bbox landmark(5*2) for (imgPath, bbox, landmarkGt) in data: #print imgPath F_imgs = [] F_landmarks = [] #print(imgPath) img = cv2.imread(imgPath) assert (img is not None) img_h, img_w, img_c = img.shape gt_box = np.array([bbox.left, bbox.top, bbox.right, bbox.bottom]) #get sub-image from bbox f_face = img[bbox.top:bbox.bottom + 1, bbox.left:bbox.right + 1] # resize the gt image to specified size f_face = cv2.resize(f_face, (size, size)) #initialize the landmark landmark = np.zeros((5, 2)) #normalize land mark by dividing the width and height of the ground truth bounding box # landmakrGt is a list of tuples for index, one in enumerate(landmarkGt): # 重新计算因裁剪过后而改变的landmark的坐标,并且进行归一化 # (x - bbox.left) / width of bbox, (y - bbox.top) / height of bbox rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]), (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1])) landmark[index] = rv F_imgs.append(f_face) F_landmarks.append(landmark.reshape(10)) #[x1, y1, x2, y2, ...] landmark = np.zeros((5, 2)) # data augment if argument: idx = idx + 1 if idx % 100 == 0: print(idx, "images done") x1, y1, x2, y2 = gt_box #gt's width gt_w = x2 - x1 + 1 #gt's height gt_h = y2 - y1 + 1 if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0: continue #random shift for i in range(10): bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8), np.ceil(1.25 * max(gt_w, gt_h))) delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2) delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2) nx1 = int(max(x1 + gt_w / 2 - bbox_size / 2 + delta_x, 0)) ny1 = int(max(y1 + gt_h / 2 - bbox_size / 2 + delta_y, 0)) nx2 = nx1 + bbox_size ny2 = ny1 + bbox_size if nx2 > img_w or ny2 > img_h: continue crop_box = np.array([nx1, ny1, nx2, ny2]) cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] resized_im = cv2.resize(cropped_im, (size, size)) #calculate iou iou = IoU(crop_box, np.expand_dims(gt_box, 0)) if iou > 0.65: F_imgs.append(resized_im) #normalize for index, one in enumerate(landmarkGt): rv = ((one[0] - nx1) / bbox_size, (one[1] - ny1) / bbox_size) landmark[index] = rv F_landmarks.append(landmark.reshape(10)) landmark = np.zeros((5, 2)) landmark_ = F_landmarks[-1].reshape(-1, 2) bbox = BBox([nx1, ny1, nx2, ny2]) #mirror if random.choice([0, 1]) > 0: face_flipped, landmark_flipped = flip( resized_im, landmark_) face_flipped = cv2.resize(face_flipped, (size, size)) #c*h*w F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #rotate if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), 5)#逆时针旋转 #landmark_offset landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) #flip face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) #anti-clockwise rotation if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \ bbox.reprojectLandmark(landmark_), -5)#顺时针旋转 landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) F_imgs.append(face_rotated_by_alpha) F_landmarks.append(landmark_rotated.reshape(10)) face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) F_imgs.append(face_flipped) F_landmarks.append(landmark_flipped.reshape(10)) F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks) #print F_imgs.shape #print F_landmarks.shape for i in range(len(F_imgs)): # 只要有一个坐标小于0或大于1就舍弃 if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0: continue if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0: continue cv2.imwrite(join(img_output_path, "%d.jpg" % (image_id)), F_imgs[i]) landmarks = map(str, list(F_landmarks[i])) f.write( join(dstdir, "%d.jpg" % (image_id)) + " -2 " + " ".join(landmarks) + "\n") image_id = image_id + 1 #print F_imgs.shape #print F_landmarks.shape #F_imgs = processImage(F_imgs) #shuffle_in_unison_scary(F_imgs, F_landmarks) f.close() return F_imgs, F_landmarks
def generate_landmark_data(landmark_truth_txt_path, images_dir, net, argument=False): """ 为特定网络类型生成关键点训练样本,label=-2 :param landmark_truth_txt_path: 包含image path, bounding box, and landmarks的txt路径 :param images_dir: 图片文件夹路径 :param net: 网络类型,('PNet', 'RNet', 'ONet') :param argument: 是否进行数据增强 :return: images and related landmarks """ if net == "PNet": size = 12 landmark_dir = path_config.pnet_landmark_dir net_data_root_dir = path_config.pnet_dir landmark_file = open(path_config.pnet_landmark_txt_path, 'w') elif net == "RNet": size = 24 landmark_dir = path_config.rnet_landmark_dir net_data_root_dir = path_config.rnet_dir landmark_file = open(path_config.rnet_landmark_txt_path, 'w') elif net == "ONet": size = 48 landmark_dir = path_config.onet_landmark_dir net_data_root_dir = path_config.onet_dir landmark_file = open(path_config.onet_landmark_txt_path, 'w') else: raise ValueError('网络类型(--net)错误!') if not os.path.exists(net_data_root_dir): os.mkdir(net_data_root_dir) if not os.path.exists(landmark_dir): os.mkdir(landmark_dir) # 读取关键点信息文件:image path , bounding box, and landmarks data = get_landmark_data(landmark_truth_txt_path, images_dir) # 针对每张图片,生成关键点训练数据 landmark_idx = 0 image_id = 0 for (imgPath, bbox, landmarkGt) in data: # 截取的图片数据和图片中关键点位置数据 cropped_images = [] cropped_landmarks = [] img = cv2.imread(imgPath) assert (img is not None) image_height, image_width, _ = img.shape gt_box = np.array([[bbox.left, bbox.top, bbox.right, bbox.bottom]]) square_gt_box = np.squeeze(convert_to_square(gt_box)) # 防止越界,同时保持方形 if square_gt_box[0] < 0: square_gt_box[2] -= square_gt_box[0] square_gt_box[0] = 0 if square_gt_box[1] < 0: square_gt_box[3] -= square_gt_box[1] square_gt_box[1] = 0 if square_gt_box[2] > image_width: square_gt_box[0] -= (square_gt_box[2] - image_width) square_gt_box[2] = image_width if square_gt_box[3] > image_height: square_gt_box[1] -= (square_gt_box[3] - image_height) square_gt_box[3] = image_height gt_box = np.squeeze(gt_box) # 计算标准化的关键点坐标 landmark = np.zeros((5, 2)) for index, one in enumerate(landmarkGt): # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box landmark[index] = ((one[0] - square_gt_box[0]) / (square_gt_box[2] - square_gt_box[0]), (one[1] - square_gt_box[1]) / (square_gt_box[3] - square_gt_box[1])) cropped_landmarks.append(landmark.reshape(10)) # 截取目标区域图片 cropped_object_image = img[square_gt_box[1]:square_gt_box[3] + 1, square_gt_box[0]:square_gt_box[2] + 1] cropped_object_image = cv2.resize(cropped_object_image, (size, size)) cropped_images.append(cropped_object_image) landmark = np.zeros((5, 2)) if argument: landmark_idx = landmark_idx + 1 if landmark_idx % 100 == 0: sys.stdout.write("\r{}/{} images done ...".format( landmark_idx, len(data))) # ground truth的坐标、宽和高 x_truth_left, y_truth_top, x_truth_right, y_truth_bottom = gt_box width_truth = x_truth_right - x_truth_left + 1 height_truth = y_truth_bottom - y_truth_top + 1 if max(width_truth, height_truth) < 20 or x_truth_left < 0 or y_truth_top < 0: continue # 随机偏移 shift_num = 0 shift_try = 0 while shift_num < 10 and shift_try < 100: bbox_size = npr.randint( int(min(width_truth, height_truth) * 0.8), np.ceil(1.25 * max(width_truth, height_truth))) delta_x = npr.randint(int(-width_truth * 0.2), np.ceil(width_truth * 0.2)) delta_y = npr.randint(int(-height_truth * 0.2), np.ceil(height_truth * 0.2)) x_left_shift = int( max( x_truth_left + width_truth / 2 - bbox_size / 2 + delta_x, 0)) y_top_shift = int( max( y_truth_top + height_truth / 2 - bbox_size / 2 + delta_y, 0)) x_right_shift = x_left_shift + bbox_size y_bottom_shift = y_top_shift + bbox_size if x_right_shift > image_width or y_bottom_shift > image_height: shift_try += 1 continue crop_box = np.array( [x_left_shift, y_top_shift, x_right_shift, y_bottom_shift]) # 计算数据增强后的偏移区域和ground truth的方形校正IoU iou = square_IoU(crop_box, np.expand_dims(gt_box, 0)) if iou > 0.65: shift_num += 1 cropped_im = img[y_top_shift:y_bottom_shift + 1, x_left_shift:x_right_shift + 1, :] resized_im = cv2.resize(cropped_im, (size, size)) cropped_images.append(resized_im) # 标准化 for index, one in enumerate(landmarkGt): landmark[index] = ((one[0] - x_left_shift) / bbox_size, (one[1] - y_top_shift) / bbox_size) cropped_landmarks.append(landmark.reshape(10)) # 进行其他类型的数据增强 landmark = np.zeros((5, 2)) landmark_ = cropped_landmarks[-1].reshape(-1, 2) bbox = BBox([ x_left_shift, y_top_shift, x_right_shift, y_bottom_shift ]) # 镜像 if random.choice([0, 1]) > 0: face_flipped, landmark_flipped = flip( resized_im, landmark_) face_flipped = cv2.resize(face_flipped, (size, size)) cropped_images.append(face_flipped) cropped_landmarks.append(landmark_flipped.reshape(10)) # 顺时针旋转 if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = \ rotate(img, bbox, bbox.reprojectLandmark(landmark_), 5) # landmark_offset landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) cropped_images.append(face_rotated_by_alpha) cropped_landmarks.append(landmark_rotated.reshape(10)) # 上下翻转 face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) cropped_images.append(face_flipped) cropped_landmarks.append(landmark_flipped.reshape(10)) # 逆时针旋转 if random.choice([0, 1]) > 0: face_rotated_by_alpha, landmark_rotated = \ rotate(img, bbox, bbox.reprojectLandmark(landmark_), -5) landmark_rotated = bbox.projectLandmark( landmark_rotated) face_rotated_by_alpha = cv2.resize( face_rotated_by_alpha, (size, size)) cropped_images.append(face_rotated_by_alpha) cropped_landmarks.append(landmark_rotated.reshape(10)) # 上下翻转 face_flipped, landmark_flipped = flip( face_rotated_by_alpha, landmark_rotated) face_flipped = cv2.resize(face_flipped, (size, size)) cropped_images.append(face_flipped) cropped_landmarks.append(landmark_flipped.reshape(10)) else: shift_try += 1 # 保存关键点训练图片及坐标信息 cropped_images, cropped_landmarks = np.asarray( cropped_images), np.asarray(cropped_landmarks) for i in range(len(cropped_images)): if np.any(cropped_landmarks[i] < 0): continue if np.any(cropped_landmarks[i] > 1): continue cv2.imwrite(os.path.join(landmark_dir, "%d.jpg" % image_id), cropped_images[i]) landmarks = map(str, list(cropped_landmarks[i])) landmark_file.write( os.path.join(landmark_dir, "%d.jpg" % image_id) + " -2 " + " ".join(landmarks) + "\n") image_id = image_id + 1 landmark_file.close() return