def save_hard_example(net, data, save_path):
    # load ground truth from annotation file
    # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image

    im_idx_list = data['images']
    # print(images[0])
    gt_boxes_list = data['bboxes']
    num_of_images = len(im_idx_list)

    print("processing %d images in total" % num_of_images)

    # save files
    neg_label_file = "../../DATA/no_LM%d/neg_%d.txt" % (net, image_size)
    neg_file = open(neg_label_file, 'w')

    pos_label_file = "../../DATA/no_LM%d/pos_%d.txt" % (net, image_size)
    pos_file = open(pos_label_file, 'w')

    part_label_file = "../../DATA/no_LM%d/part_%d.txt" % (net, image_size)
    part_file = open(part_label_file, 'w')
    # read detect result
    det_boxes = pickle.load(
        open(os.path.join(save_path, 'detections.pkl'), 'rb'))
    # print(len(det_boxes), num_of_images)
    print(len(det_boxes))
    print(num_of_images)
    assert len(
        det_boxes) == num_of_images, "incorrect detections or ground truths"

    # index of neg, pos and part face, used as their image names
    n_idx = 0
    p_idx = 0
    d_idx = 0
    image_done = 0
    # im_idx_list image index(list)
    # det_boxes detect result(list)
    # gt_boxes_list gt(list)
    for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
        gts = np.array(gts, dtype=np.float32).reshape(-1, 4)
        if image_done % 100 == 0:
            print("%d images done" % image_done)
        image_done += 1

        if dets.shape[0] == 0:
            continue
        img = cv2.imread(im_idx)
        # change to square
        dets = convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])
        neg_num = 0
        for box in dets:
            x_left, y_top, x_right, y_bottom, _ = box.astype(int)
            width = x_right - x_left + 1
            height = y_bottom - y_top + 1

            # ignore box that is too small or beyond image border
            if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[
                    1] - 1 or y_bottom > img.shape[0] - 1:
                continue

            # compute intersection over union(IoU) between current box and all gt boxes
            Iou = IoU(box, gts)
            cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                    interpolation=cv2.INTER_LINEAR)

            # save negative images and write label
            # Iou with all gts must below 0.3
            if np.max(Iou) < 0.3 and neg_num < 60:
                # save the examples
                save_file = get_path(neg_dir, "%s.jpg" % n_idx)
                # print(save_file)
                neg_file.write(save_file + ' 0\n')
                cv2.imwrite(save_file, resized_im)
                n_idx += 1
                neg_num += 1
            else:
                # find gt_box with the highest iou
                idx = np.argmax(Iou)
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt

                # compute bbox reg label
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)

                # save positive and part-face images and write labels
                if np.max(Iou) >= 0.65:
                    save_file = get_path(pos_dir, "%s.jpg" % p_idx)
                    pos_file.write(
                        save_file + ' 1 %.2f %.2f %.2f %.2f\n' %
                        (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1

                elif np.max(Iou) >= 0.4:
                    save_file = os.path.join(part_dir, "%s.jpg" % d_idx)
                    part_file.write(
                        save_file + ' -1 %.2f %.2f %.2f %.2f\n' %
                        (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
    neg_file.close()
    part_file.close()
    pos_file.close()
def save_hard_example(net):
    image_dir = "/Users/qiuxiaocong/Downloads/WIDER_train/images"
    # 用于存放24×24的R_NET 训练数据 images!
    neg_save_dir = "/Users/qiuxiaocong/Downloads/mtcnn1/images/24/negative"
    pos_save_dir = "/Users/qiuxiaocong/Downloads/mtcnn1/images/24/positive"
    part_save_dir = "/Users/qiuxiaocong/Downloads/mtcnn1/images/24/part"

    # load ground truth from annotation file
    # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image
    # 整个训练集对应一个annotation文件,这个文件是通过transform.py生成的
    # anno_file = '/Users/qiuxiaocong/Downloads/mtcnn-master/anno.txt'
    anno_file = '/Users/qiuxiaocong/Downloads/mtcnn1/imglists/anno_demo.txt'

    with open(anno_file, 'r') as f:
        annotations = f.readlines()  # 每张图片一行annotation

    if net == "rnet":
        image_size = 24
    if net == "onet":
        image_size = 48

    # im_idx_list存放每张训练集图片的绝对路径
    im_idx_list = list()
    # gt_boxes_list存放每张训练集图片的所有Ground Truth Box
    gt_boxes_list = list()

    num_of_images = len(annotations)
    print("processing %d images in total" % num_of_images)

    for annotation in annotations:
        annotation = annotation.strip().split(' ')
        # 图片的绝对路径
        im_idx = annotation[0]
        # boxes为一张图片的所有Ground Truth box
        boxes = list(map(float, annotation[1:]))
        boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)

        im_idx_list.append(im_idx)
        gt_boxes_list.append(boxes)

    # net网络 对应的训练数据的保存路径
    save_path = "/Users/qiuxiaocong/Downloads/mtcnn1/prepare_data/%s" % net
    # save_path = "./prepare_data/%s"%net

    # 创建用于r_net训练的pos/neg/part,之后同样使用gen_imglist得到比例为3:1:1的image_set
    f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w')
    f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w')
    f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w')

    # 加载整个训练集经过P_NET检测得到的Region Proposal
    det_boxes = pickle.load(
        open(os.path.join(save_path, 'detections.pkl'), 'rb'))
    assert len(
        det_boxes) == num_of_images, "incorrect detections or ground truths"

    # index of neg, pos and part face, used as their image names
    n_idx = 0
    p_idx = 0
    d_idx = 0
    image_done = 0
    for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
        if image_done % 100 == 0:
            print("%d images done" % image_done)
        image_done += 1

        #  意思是p_net未能对于某一张图片未能得到Region Proposal
        if dets.shape[0] == 0:
            continue

        img = cv2.imread(os.path.join(im_idx))
        # 经过convert_to_square 去掉dets中的score项,只保留4个坐标项
        dets = convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        for box in dets:
            x_left, y_top, x_right, y_bottom, _ = box.astype(int)
            width = x_right - x_left + 1
            height = y_bottom - y_top + 1

            # ignore box that is too small or beyond image border
            if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[
                    1] - 1 or y_bottom > img.shape[0] - 1:
                continue

            # compute intersection over union(IoU) between current box and all gt boxes
            Iou = IoU(box, gts)
            # 从原图截取,并resize至24×24(或48×48)
            cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                    interpolation=cv2.INTER_LINEAR)

            # save negative images and write label
            # 当前的Region Proposal与所有GT的最大IOU都小于0.3,那么这个box必定是neg
            if np.max(Iou) < 0.3:
                # Iou with all gts must below 0.3
                save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                # print(save_file)
                f2.write("%s/negative/%s" % (image_size, n_idx) + ' 0\n')
                cv2.imwrite(save_file, resized_im)
                n_idx += 1
            else:
                # find gt_box with the highest iou
                idx = np.argmax(Iou)
                # assigned_gt为与当前box最接近的Ground Truth Box
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt

                # compute bbox reg label
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)

                # save positive and part-face images and write labels
                if np.max(Iou) >= 0.65:
                    save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                    # print(save_file)
                    print()
                    f1.write("%s/positive/%s" % (image_size, p_idx) +
                             ' 1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1

                elif np.max(Iou) >= 0.4:
                    save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                    # print(save_file)
                    f3.write("%s/part/%s" % (image_size, d_idx) +
                             ' -1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
    f1.close()
    f2.close()
    f3.close()
def save_hard_example(data, test_mode, save_path):
    """ 对模型测试的结果根据预测框和ground truth的IoU进行划分,用于训练下一个网络的困难数据集
    :param data: 模型测试的图片信息数据
    :param test_mode: 测试的网络模型,(PNet,RNet)
    :param save_path: 测试的模型pickle结果保存的路径
    :return:
    """
    im_idx_list = data['images']
    gt_boxes_list = data['bboxes']
    num_of_images = len(im_idx_list)
    print("共需处理图片数:", num_of_images)
    
    # 不同样本图片保存路径
    if test_mode == 'PNet':
        pos_label_file = path_config.rnet_pos_txt_path
        part_label_file = path_config.rnet_part_txt_path
        neg_label_file = path_config.rnet_neg_txt_path
    elif test_mode == 'RNet':
        pos_label_file = path_config.onet_pos_txt_path
        part_label_file = path_config.onet_part_txt_path
        neg_label_file = path_config.onet_neg_txt_path
    else:
        raise ValueError('网络类型(--test_mode)错误!')
    
    pos_file = open(pos_label_file, 'w')
    part_file = open(part_label_file, 'w')
    neg_file = open(neg_label_file, 'w')
    # 读取检测结果pickle数据
    det_boxes = pickle.load(open(os.path.join(save_path, 'detections.pkl'), 'rb'))
    assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"
    
    # 负样本,正样本,部分样本的图片数量,作为文件名
    n_idx = 0
    p_idx = 0
    d_idx = 0
    image_done = 0  # 已处理图片
    no_pos_image_num = 0  # 没有产生正样本的累积图片数量
    old_p_idx = -1  # 上一张图片的正样本总数
    for im_idx, actual_detections, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
        gts = np.array(gts, dtype=np.float32).reshape(-1, 4)
        # 当前正样本总数与上一张图片的正样本总数相等,说明当前图片没有产生正样本
        if old_p_idx == p_idx:
            no_pos_image_num += 1
        else:
            old_p_idx = p_idx
        if (image_done + 1) % 100 == 0:
            print("生成进度:{}/{}".format(image_done + 1, num_of_images))
            print("neg:{}, pos:{}, part:{}, no pos image:{}".format(n_idx, p_idx, d_idx, no_pos_image_num))
        image_done += 1
        
        if actual_detections.shape[0] == 0:
            continue
        # 给每个检测框划分为对应的训练样本:IoU<0.3为负样本,0.4~0.65为部分样本,>0.65为正样本
        img = cv2.imread(im_idx)
        # 将检测结果转为方形,因为下一个网络输入为方形输入
        squared_detections = convert_to_square(actual_detections)
        squared_detections[:, 0:4] = np.round(squared_detections[:, 0:4])
        for index, box in enumerate(squared_detections):
            x_left, y_top, x_right, y_bottom, _ = box.astype(int)
            width = x_right - x_left + 1
            height = y_bottom - y_top + 1
            
            # 忽略小图或越界的
            if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
                continue
            
            # 计算实际检测框和ground truth检测框的IoU,但crop的图片是方形后的区域
            iou = IoU(actual_detections[index], gts)
            cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)
            
            # 保存negative样本(IoU<0.3),并写label文件
            if np.max(iou) < 0.3:
                save_file = os.path.join(neg_dir, "%s.jpg" % n_idx)
                neg_file.write(save_file + ' 0\n')
                cv2.imwrite(save_file, resized_im)
                n_idx += 1
            # 保存part样本(0.65>IoU>0.4)或positive样本(IoU>0.65),并写label文件
            else:
                idx = np.argmax(iou)
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt
                
                # 计算bounding box回归量,作为训练样本
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)
                
                if np.max(iou) >= 0.65:
                    save_file = os.path.join(pos_dir, "%s.jpg" % p_idx)
                    pos_file.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1
                elif np.max(iou) >= 0.4:
                    save_file = os.path.join(part_dir, "%s.jpg" % d_idx)
                    part_file.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
    neg_file.close()
    part_file.close()
    pos_file.close()
Beispiel #4
0
def generate_landmark_data(landmark_truth_txt_path,
                           images_dir,
                           net,
                           argument=False):
    """ 为特定网络类型生成关键点训练样本,label=-2
    :param landmark_truth_txt_path: 包含image path, bounding box, and landmarks的txt路径
    :param images_dir: 图片文件夹路径
    :param net: 网络类型,('PNet', 'RNet', 'ONet')
    :param argument: 是否进行数据增强
    :return:  images and related landmarks
    """
    if net == "PNet":
        size = 12
        landmark_dir = path_config.pnet_landmark_dir
        net_data_root_dir = path_config.pnet_dir
        landmark_file = open(path_config.pnet_landmark_txt_path, 'w')
    elif net == "RNet":
        size = 24
        landmark_dir = path_config.rnet_landmark_dir
        net_data_root_dir = path_config.rnet_dir
        landmark_file = open(path_config.rnet_landmark_txt_path, 'w')
    elif net == "ONet":
        size = 48
        landmark_dir = path_config.onet_landmark_dir
        net_data_root_dir = path_config.onet_dir
        landmark_file = open(path_config.onet_landmark_txt_path, 'w')
    else:
        raise ValueError('网络类型(--net)错误!')

    if not os.path.exists(net_data_root_dir):
        os.mkdir(net_data_root_dir)
    if not os.path.exists(landmark_dir):
        os.mkdir(landmark_dir)

    # 读取关键点信息文件:image path , bounding box, and landmarks
    data = get_landmark_data(landmark_truth_txt_path, images_dir)
    # 针对每张图片,生成关键点训练数据
    landmark_idx = 0
    image_id = 0
    for (imgPath, bbox, landmarkGt) in data:
        # 截取的图片数据和图片中关键点位置数据
        cropped_images = []
        cropped_landmarks = []

        img = cv2.imread(imgPath)
        assert (img is not None)
        image_height, image_width, _ = img.shape

        gt_box = np.array([[bbox.left, bbox.top, bbox.right, bbox.bottom]])
        square_gt_box = np.squeeze(convert_to_square(gt_box))
        # 防止越界,同时保持方形
        if square_gt_box[0] < 0:
            square_gt_box[2] -= square_gt_box[0]
            square_gt_box[0] = 0
        if square_gt_box[1] < 0:
            square_gt_box[3] -= square_gt_box[1]
            square_gt_box[1] = 0
        if square_gt_box[2] > image_width:
            square_gt_box[0] -= (square_gt_box[2] - image_width)
            square_gt_box[2] = image_width
        if square_gt_box[3] > image_height:
            square_gt_box[1] -= (square_gt_box[3] - image_height)
            square_gt_box[3] = image_height

        gt_box = np.squeeze(gt_box)
        # 计算标准化的关键点坐标
        landmark = np.zeros((5, 2))
        for index, one in enumerate(landmarkGt):
            # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box
            landmark[index] = ((one[0] - square_gt_box[0]) /
                               (square_gt_box[2] - square_gt_box[0]),
                               (one[1] - square_gt_box[1]) /
                               (square_gt_box[3] - square_gt_box[1]))
        cropped_landmarks.append(landmark.reshape(10))

        # 截取目标区域图片
        cropped_object_image = img[square_gt_box[1]:square_gt_box[3] + 1,
                                   square_gt_box[0]:square_gt_box[2] + 1]
        cropped_object_image = cv2.resize(cropped_object_image, (size, size))
        cropped_images.append(cropped_object_image)

        landmark = np.zeros((5, 2))
        if argument:
            landmark_idx = landmark_idx + 1
            if landmark_idx % 100 == 0:
                sys.stdout.write("\r{}/{} images done ...".format(
                    landmark_idx, len(data)))

            # ground truth的坐标、宽和高
            x_truth_left, y_truth_top, x_truth_right, y_truth_bottom = gt_box
            width_truth = x_truth_right - x_truth_left + 1
            height_truth = y_truth_bottom - y_truth_top + 1
            if max(width_truth,
                   height_truth) < 20 or x_truth_left < 0 or y_truth_top < 0:
                continue
            # 随机偏移
            shift_num = 0
            shift_try = 0
            while shift_num < 10 and shift_try < 100:
                bbox_size = npr.randint(
                    int(min(width_truth, height_truth) * 0.8),
                    np.ceil(1.25 * max(width_truth, height_truth)))
                delta_x = npr.randint(int(-width_truth * 0.2),
                                      np.ceil(width_truth * 0.2))
                delta_y = npr.randint(int(-height_truth * 0.2),
                                      np.ceil(height_truth * 0.2))
                x_left_shift = int(
                    max(
                        x_truth_left + width_truth / 2 - bbox_size / 2 +
                        delta_x, 0))
                y_top_shift = int(
                    max(
                        y_truth_top + height_truth / 2 - bbox_size / 2 +
                        delta_y, 0))
                x_right_shift = x_left_shift + bbox_size
                y_bottom_shift = y_top_shift + bbox_size
                if x_right_shift > image_width or y_bottom_shift > image_height:
                    shift_try += 1
                    continue
                crop_box = np.array(
                    [x_left_shift, y_top_shift, x_right_shift, y_bottom_shift])
                # 计算数据增强后的偏移区域和ground truth的方形校正IoU
                iou = square_IoU(crop_box, np.expand_dims(gt_box, 0))
                if iou > 0.65:
                    shift_num += 1
                    cropped_im = img[y_top_shift:y_bottom_shift + 1,
                                     x_left_shift:x_right_shift + 1, :]
                    resized_im = cv2.resize(cropped_im, (size, size))
                    cropped_images.append(resized_im)
                    # 标准化
                    for index, one in enumerate(landmarkGt):
                        landmark[index] = ((one[0] - x_left_shift) / bbox_size,
                                           (one[1] - y_top_shift) / bbox_size)
                    cropped_landmarks.append(landmark.reshape(10))

                    # 进行其他类型的数据增强
                    landmark = np.zeros((5, 2))
                    landmark_ = cropped_landmarks[-1].reshape(-1, 2)
                    bbox = BBox([
                        x_left_shift, y_top_shift, x_right_shift,
                        y_bottom_shift
                    ])
                    # 镜像
                    if random.choice([0, 1]) > 0:
                        face_flipped, landmark_flipped = flip(
                            resized_im, landmark_)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        cropped_images.append(face_flipped)
                        cropped_landmarks.append(landmark_flipped.reshape(10))

                    # 顺时针旋转
                    if random.choice([0, 1]) > 0:
                        face_rotated_by_alpha, landmark_rotated = \
                            rotate(img, bbox, bbox.reprojectLandmark(landmark_), 5)
                        # landmark_offset
                        landmark_rotated = bbox.projectLandmark(
                            landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(
                            face_rotated_by_alpha, (size, size))
                        cropped_images.append(face_rotated_by_alpha)
                        cropped_landmarks.append(landmark_rotated.reshape(10))
                        # 上下翻转
                        face_flipped, landmark_flipped = flip(
                            face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        cropped_images.append(face_flipped)
                        cropped_landmarks.append(landmark_flipped.reshape(10))

                    # 逆时针旋转
                    if random.choice([0, 1]) > 0:
                        face_rotated_by_alpha, landmark_rotated = \
                            rotate(img, bbox, bbox.reprojectLandmark(landmark_), -5)
                        landmark_rotated = bbox.projectLandmark(
                            landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(
                            face_rotated_by_alpha, (size, size))
                        cropped_images.append(face_rotated_by_alpha)
                        cropped_landmarks.append(landmark_rotated.reshape(10))
                        # 上下翻转
                        face_flipped, landmark_flipped = flip(
                            face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        cropped_images.append(face_flipped)
                        cropped_landmarks.append(landmark_flipped.reshape(10))
                else:
                    shift_try += 1

        # 保存关键点训练图片及坐标信息
        cropped_images, cropped_landmarks = np.asarray(
            cropped_images), np.asarray(cropped_landmarks)
        for i in range(len(cropped_images)):
            if np.any(cropped_landmarks[i] < 0):
                continue
            if np.any(cropped_landmarks[i] > 1):
                continue

            cv2.imwrite(os.path.join(landmark_dir, "%d.jpg" % image_id),
                        cropped_images[i])
            landmarks = map(str, list(cropped_landmarks[i]))
            landmark_file.write(
                os.path.join(landmark_dir, "%d.jpg" % image_id) + " -2 " +
                " ".join(landmarks) + "\n")
            image_id = image_id + 1

    landmark_file.close()
    return