def save_hard_example(data, test_mode, save_path):
    """ 对模型测试的结果根据预测框和ground truth的IoU进行划分,用于训练下一个网络的困难数据集
    :param data: 模型测试的图片信息数据
    :param test_mode: 测试的网络模型,(PNet,RNet)
    :param save_path: 测试的模型pickle结果保存的路径
    :return:
    """
    im_idx_list = data['images']
    gt_boxes_list = data['bboxes']
    num_of_images = len(im_idx_list)
    print("共需处理图片数:", num_of_images)
    
    # 不同样本图片保存路径
    if test_mode == 'PNet':
        pos_label_file = path_config.rnet_pos_txt_path
        part_label_file = path_config.rnet_part_txt_path
        neg_label_file = path_config.rnet_neg_txt_path
    elif test_mode == 'RNet':
        pos_label_file = path_config.onet_pos_txt_path
        part_label_file = path_config.onet_part_txt_path
        neg_label_file = path_config.onet_neg_txt_path
    else:
        raise ValueError('网络类型(--test_mode)错误!')
    
    pos_file = open(pos_label_file, 'w')
    part_file = open(part_label_file, 'w')
    neg_file = open(neg_label_file, 'w')
    # 读取检测结果pickle数据
    det_boxes = pickle.load(open(os.path.join(save_path, 'detections.pkl'), 'rb'))
    assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"
    
    # 负样本,正样本,部分样本的图片数量,作为文件名
    n_idx = 0
    p_idx = 0
    d_idx = 0
    image_done = 0  # 已处理图片
    no_pos_image_num = 0  # 没有产生正样本的累积图片数量
    old_p_idx = -1  # 上一张图片的正样本总数
    for im_idx, actual_detections, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
        gts = np.array(gts, dtype=np.float32).reshape(-1, 4)
        # 当前正样本总数与上一张图片的正样本总数相等,说明当前图片没有产生正样本
        if old_p_idx == p_idx:
            no_pos_image_num += 1
        else:
            old_p_idx = p_idx
        if (image_done + 1) % 100 == 0:
            print("生成进度:{}/{}".format(image_done + 1, num_of_images))
            print("neg:{}, pos:{}, part:{}, no pos image:{}".format(n_idx, p_idx, d_idx, no_pos_image_num))
        image_done += 1
        
        if actual_detections.shape[0] == 0:
            continue
        # 给每个检测框划分为对应的训练样本:IoU<0.3为负样本,0.4~0.65为部分样本,>0.65为正样本
        img = cv2.imread(im_idx)
        # 将检测结果转为方形,因为下一个网络输入为方形输入
        squared_detections = convert_to_square(actual_detections)
        squared_detections[:, 0:4] = np.round(squared_detections[:, 0:4])
        for index, box in enumerate(squared_detections):
            x_left, y_top, x_right, y_bottom, _ = box.astype(int)
            width = x_right - x_left + 1
            height = y_bottom - y_top + 1
            
            # 忽略小图或越界的
            if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
                continue
            
            # 计算实际检测框和ground truth检测框的IoU,但crop的图片是方形后的区域
            iou = IoU(actual_detections[index], gts)
            cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)
            
            # 保存negative样本(IoU<0.3),并写label文件
            if np.max(iou) < 0.3:
                save_file = os.path.join(neg_dir, "%s.jpg" % n_idx)
                neg_file.write(save_file + ' 0\n')
                cv2.imwrite(save_file, resized_im)
                n_idx += 1
            # 保存part样本(0.65>IoU>0.4)或positive样本(IoU>0.65),并写label文件
            else:
                idx = np.argmax(iou)
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt
                
                # 计算bounding box回归量,作为训练样本
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)
                
                if np.max(iou) >= 0.65:
                    save_file = os.path.join(pos_dir, "%s.jpg" % p_idx)
                    pos_file.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1
                elif np.max(iou) >= 0.4:
                    save_file = os.path.join(part_dir, "%s.jpg" % d_idx)
                    part_file.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
    neg_file.close()
    part_file.close()
    pos_file.close()
Ejemplo n.º 2
0
    neg_num = 0
    #1---->50
    # keep crop random parts, until have 50 negative examples
    # get 50 negative sample from every image
    while neg_num < 50:
        #neg_num's size [40,min(width, height) / 2],min_size:40
        # size is a random number between 12 and min(width,height)
        size = npr.randint(12, min(width, height) / 2)
        #top_left coordinate
        nx = npr.randint(0, width - size)
        ny = npr.randint(0, height - size)
        #random crop
        crop_box = np.array([nx, ny, nx + size, ny + size])
        #calculate iou
        Iou = IoU(crop_box, boxes)

        #crop a part from inital image
        cropped_im = img[ny:ny + size, nx:nx + size, :]
        #resize the cropped image to size 12*12
        resized_im = cv2.resize(cropped_im, (12, 12),
                                interpolation=cv2.INTER_LINEAR)

        if np.max(Iou) < 0.3:
            # Iou with all gts must below 0.3
            save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
            f2.write("../../DATA/12/negative/%s.jpg" % n_idx + ' 0\n')
            cv2.imwrite(save_file, resized_im)
            n_idx += 1
            neg_num += 1
def GenerateData(ftxt,
                 data_path,
                 output_path,
                 img_output_path,
                 net,
                 argument=False):
    '''
    参数
    ------------

        ftxt: path of anno file
        data_path: 数据集所在目录
        output_path: 文本文件输出目录地址
        img_output_path: 图片输出地址
        net: String 三个网络之一的名字
        argument: 是否使用数据增强
    
    返回值
    -------------
        images and related landmarks
    '''
    if net == "PNet":
        size = 12
    elif net == "RNet":
        size = 24
    elif net == "ONet":
        size = 48
    else:
        print('Net type error')
        return
    image_id = 0
    #
    f = open(join(output_path, "landmark_%s_aug.txt" % (size)), 'w')
    #img_output_path = "train_landmark_few"
    # get image path , bounding box, and landmarks from file 'ftxt'
    data = getDataFromTxt(ftxt, data_path=data_path)
    idx = 0
    #image_path bbox landmark(5*2)
    for (imgPath, bbox, landmarkGt) in data:
        #print imgPath
        F_imgs = []
        F_landmarks = []
        #print(imgPath)
        img = cv2.imread(imgPath)

        assert (img is not None)
        img_h, img_w, img_c = img.shape
        gt_box = np.array([bbox.left, bbox.top, bbox.right, bbox.bottom])
        #get sub-image from bbox
        f_face = img[bbox.top:bbox.bottom + 1, bbox.left:bbox.right + 1]
        # resize the gt image to specified size
        f_face = cv2.resize(f_face, (size, size))
        #initialize the landmark
        landmark = np.zeros((5, 2))

        #normalize land mark by dividing the width and height of the ground truth bounding box
        # landmakrGt is a list of tuples
        for index, one in enumerate(landmarkGt):
            # 重新计算因裁剪过后而改变的landmark的坐标,并且进行归一化
            # (x - bbox.left) / width of bbox, (y - bbox.top) / height of bbox
            rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]),
                  (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1]))
            landmark[index] = rv

        F_imgs.append(f_face)
        F_landmarks.append(landmark.reshape(10))  #[x1, y1, x2, y2, ...]
        landmark = np.zeros((5, 2))
        # data augment
        if argument:
            idx = idx + 1
            if idx % 100 == 0:
                print(idx, "images done")
            x1, y1, x2, y2 = gt_box
            #gt's width
            gt_w = x2 - x1 + 1
            #gt's height
            gt_h = y2 - y1 + 1
            if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0:
                continue
            #random shift
            for i in range(10):
                bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8),
                                        np.ceil(1.25 * max(gt_w, gt_h)))
                delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2)
                delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2)
                nx1 = int(max(x1 + gt_w / 2 - bbox_size / 2 + delta_x, 0))
                ny1 = int(max(y1 + gt_h / 2 - bbox_size / 2 + delta_y, 0))

                nx2 = nx1 + bbox_size
                ny2 = ny1 + bbox_size
                if nx2 > img_w or ny2 > img_h:
                    continue
                crop_box = np.array([nx1, ny1, nx2, ny2])

                cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :]
                resized_im = cv2.resize(cropped_im, (size, size))
                #calculate iou
                iou = IoU(crop_box, np.expand_dims(gt_box, 0))
                if iou > 0.65:
                    F_imgs.append(resized_im)
                    #normalize
                    for index, one in enumerate(landmarkGt):
                        rv = ((one[0] - nx1) / bbox_size,
                              (one[1] - ny1) / bbox_size)
                        landmark[index] = rv
                    F_landmarks.append(landmark.reshape(10))
                    landmark = np.zeros((5, 2))
                    landmark_ = F_landmarks[-1].reshape(-1, 2)
                    bbox = BBox([nx1, ny1, nx2, ny2])

                    #mirror
                    if random.choice([0, 1]) > 0:
                        face_flipped, landmark_flipped = flip(
                            resized_im, landmark_)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        #c*h*w
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))
                    #rotate
                    if random.choice([0, 1]) > 0:
                        face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
                                                                         bbox.reprojectLandmark(landmark_), 5)#逆时针旋转
                        #landmark_offset
                        landmark_rotated = bbox.projectLandmark(
                            landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(
                            face_rotated_by_alpha, (size, size))
                        F_imgs.append(face_rotated_by_alpha)
                        F_landmarks.append(landmark_rotated.reshape(10))

                        #flip
                        face_flipped, landmark_flipped = flip(
                            face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))

                    #anti-clockwise rotation
                    if random.choice([0, 1]) > 0:
                        face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
                                                                         bbox.reprojectLandmark(landmark_), -5)#顺时针旋转
                        landmark_rotated = bbox.projectLandmark(
                            landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(
                            face_rotated_by_alpha, (size, size))
                        F_imgs.append(face_rotated_by_alpha)
                        F_landmarks.append(landmark_rotated.reshape(10))

                        face_flipped, landmark_flipped = flip(
                            face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))

            F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks)
            #print F_imgs.shape
            #print F_landmarks.shape
            for i in range(len(F_imgs)):
                # 只要有一个坐标小于0或大于1就舍弃
                if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0:
                    continue
                if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0:
                    continue

                cv2.imwrite(join(img_output_path, "%d.jpg" % (image_id)),
                            F_imgs[i])
                landmarks = map(str, list(F_landmarks[i]))
                f.write(
                    join(dstdir, "%d.jpg" % (image_id)) + " -2 " +
                    " ".join(landmarks) + "\n")
                image_id = image_id + 1

    #print F_imgs.shape
    #print F_landmarks.shape
    #F_imgs = processImage(F_imgs)
    #shuffle_in_unison_scary(F_imgs, F_landmarks)

    f.close()
    return F_imgs, F_landmarks
Ejemplo n.º 4
0
def GenerateData(ftxt, data_path, net, argument=False):
    '''

    :param ftxt: name/path of the text file that contains image path,
                bounding box, and landmarks

    :param output: path of the output dir
    :param net: one of the net in the cascaded networks
    :param argument: apply augmentation or not
    :return:  images and related landmarks
    '''
    if net == "PNet":
        size = 12
    elif net == "RNet":
        size = 24
    elif net == "ONet":
        size = 48
    else:
        print('Net type error')
        return
    image_id = 0
    #
    f = open(join(OUTPUT, "landmark_%s_aug.txt" % (size)), 'w')
    #dstdir = "train_landmark_few"
    # get image path , bounding box, and landmarks from file 'ftxt'
    data = getDataFromTxt(ftxt, data_path=data_path)  # 图片路径,框-4,标注-(5,2)
    idx = 0
    #image_path bbox landmark(5*2)
    for (imgPath, bbox, landmarkGt) in data:
        #print imgPath
        F_imgs = []
        F_landmarks = []
        #print(imgPath)
        img = cv2.imread(imgPath)

        assert (img is not None)
        img_h, img_w, img_c = img.shape
        gt_box = np.array([bbox.left, bbox.top, bbox.right, bbox.bottom])
        #get sub-image from bbox        得到框出来的图
        f_face = img[bbox.top:bbox.bottom + 1, bbox.left:bbox.right + 1]
        # resize the gt image to specified size     将大小调整指定的尺寸
        f_face = cv2.resize(f_face, (size, size))
        #initialize the landmark
        landmark = np.zeros((5, 2))

        #normalize land mark by dividing the width and height of the ground truth bounding box
        # landmakrGt is a list of tuples    对标注进行归一化(除以框)
        for index, one in enumerate(landmarkGt):
            # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box
            rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]),
                  (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1]))
            # put the normalized value into the new list landmark
            landmark[index] = rv

        F_imgs.append(f_face)
        F_landmarks.append(landmark.reshape(10))
        landmark = np.zeros((5, 2))
        if argument:  # 数据集扩展
            idx = idx + 1
            if idx % 100 == 0:
                print(idx, "images done")
            x1, y1, x2, y2 = gt_box
            #gt's width
            gt_w = x2 - x1 + 1
            #gt's height
            gt_h = y2 - y1 + 1
            if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0:  # 框的大小限制
                continue
            #random shift
            for i in range(10):
                bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8),
                                        np.ceil(1.25 *
                                                max(gt_w, gt_h)))  # 框的大小
                delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2)
                delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2)
                nx1 = int(max(x1 + gt_w / 2 - bbox_size / 2 + delta_x, 0))
                ny1 = int(max(y1 + gt_h / 2 - bbox_size / 2 + delta_y, 0))

                nx2 = nx1 + bbox_size
                ny2 = ny1 + bbox_size
                if nx2 > img_w or ny2 > img_h:
                    continue
                crop_box = np.array([nx1, ny1, nx2, ny2])

                cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :]
                resized_im = cv2.resize(cropped_im, (size, size))
                #cal iou
                iou = IoU(crop_box, np.expand_dims(gt_box, 0))
                if iou > 0.65:
                    F_imgs.append(resized_im)
                    #normalize
                    for index, one in enumerate(landmarkGt):
                        rv = ((one[0] - nx1) / bbox_size,
                              (one[1] - ny1) / bbox_size)
                        landmark[index] = rv
                    F_landmarks.append(landmark.reshape(10))
                    landmark = np.zeros((5, 2))
                    landmark_ = F_landmarks[-1].reshape(-1, 2)
                    bbox = BBox([nx1, ny1, nx2, ny2])

                    #mirror
                    if random.choice([0, 1]) > 0:
                        face_flipped, landmark_flipped = flip(
                            resized_im, landmark_)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        #c*h*w
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))
                    #rotate
                    if random.choice([0, 1]) > 0:
                        face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
                                                                         bbox.reprojectLandmark(landmark_), 5)#逆时针旋转
                        #landmark_offset
                        landmark_rotated = bbox.projectLandmark(
                            landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(
                            face_rotated_by_alpha, (size, size))
                        F_imgs.append(face_rotated_by_alpha)
                        F_landmarks.append(landmark_rotated.reshape(10))

                        #flip
                        face_flipped, landmark_flipped = flip(
                            face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))

                    #anti-clockwise rotation
                    if random.choice([0, 1]) > 0:
                        face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
                                                                         bbox.reprojectLandmark(landmark_), -5)#顺时针旋转
                        landmark_rotated = bbox.projectLandmark(
                            landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(
                            face_rotated_by_alpha, (size, size))
                        F_imgs.append(face_rotated_by_alpha)
                        F_landmarks.append(landmark_rotated.reshape(10))

                        face_flipped, landmark_flipped = flip(
                            face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))

            F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks)
            #print F_imgs.shape
            #print F_landmarks.shape
            for i in range(len(F_imgs)):
                #if image_id % 100 == 0:

                #print('image id : ', image_id)

                if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0:
                    continue

                if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0:
                    continue

                cv2.imwrite(join(dstdir, "%d.jpg" % (image_id)), F_imgs[i])
                landmarks = map(str, list(F_landmarks[i]))
                f.write(
                    join(dstdir, "%d.jpg" % (image_id)) + " -2 " +
                    " ".join(landmarks) + "\n")
                image_id = image_id + 1

    #print F_imgs.shape
    #print F_landmarks.shape
    #F_imgs = processImage(F_imgs)
    #shuffle_in_unison_scary(F_imgs, F_landmarks)

    f.close()
    return F_imgs, F_landmarks
Ejemplo n.º 5
0
    if idx % 100 == 0:
        print(idx, "images done")

    height, width, channel = img.shape  # 输入图片的高/宽/通道数

    neg_num = 0
    # 每张image生成50个negative sample[不依赖于GT产生] 即其产生的neg sample与GT可能没有IOU值
    while neg_num < 50:
        # 从[12,min(width, height) / 2)范围中生成一个随机数
        size = npr.randint(12, min(width, height) / 2)
        nx = npr.randint(0, width - size)
        ny = npr.randint(0, height - size)
        # 仅仅是box的左上角x/y坐标和右下角x/y坐标,还不是图片
        crop_box = np.array([nx, ny, nx + size, ny + size])

        Iou = IoU(crop_box, boxes)
        # 从原图中crop得到的图片
        cropped_im = img[ny:ny + size, nx:nx + size, :]
        # 将crop部分resize成 12×12,输入P_NET
        resized_im = cv2.resize(cropped_im, (12, 12),
                                interpolation=cv2.INTER_LINEAR)

        # IOU最大值都小于0.3的图片归入negative部分
        if np.max(Iou) < 0.3:
            # Iou with all gts must below 0.3
            save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
            f2.write("12/negative/%s" % n_idx + ' 0\n')
            cv2.imwrite(save_file, resized_im)
            n_idx += 1  # 用于记录总共多少negative sample
            neg_num += 1  # 用于每张图片的50个negative sample的选取
Ejemplo n.º 6
0
def save_hard_example(net):
    image_dir = "/Users/qiuxiaocong/Downloads/WIDER_train/images"
    # 用于存放24×24的R_NET 训练数据 images!
    neg_save_dir = "/Users/qiuxiaocong/Downloads/mtcnn1/images/24/negative"
    pos_save_dir = "/Users/qiuxiaocong/Downloads/mtcnn1/images/24/positive"
    part_save_dir = "/Users/qiuxiaocong/Downloads/mtcnn1/images/24/part"

    # load ground truth from annotation file
    # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image
    # 整个训练集对应一个annotation文件,这个文件是通过transform.py生成的
    # anno_file = '/Users/qiuxiaocong/Downloads/mtcnn-master/anno.txt'
    anno_file = '/Users/qiuxiaocong/Downloads/mtcnn1/imglists/anno_demo.txt'

    with open(anno_file, 'r') as f:
        annotations = f.readlines()  # 每张图片一行annotation

    if net == "rnet":
        image_size = 24
    if net == "onet":
        image_size = 48

    # im_idx_list存放每张训练集图片的绝对路径
    im_idx_list = list()
    # gt_boxes_list存放每张训练集图片的所有Ground Truth Box
    gt_boxes_list = list()

    num_of_images = len(annotations)
    print("processing %d images in total" % num_of_images)

    for annotation in annotations:
        annotation = annotation.strip().split(' ')
        # 图片的绝对路径
        im_idx = annotation[0]
        # boxes为一张图片的所有Ground Truth box
        boxes = list(map(float, annotation[1:]))
        boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)

        im_idx_list.append(im_idx)
        gt_boxes_list.append(boxes)

    # net网络 对应的训练数据的保存路径
    save_path = "/Users/qiuxiaocong/Downloads/mtcnn1/prepare_data/%s" % net
    # save_path = "./prepare_data/%s"%net

    # 创建用于r_net训练的pos/neg/part,之后同样使用gen_imglist得到比例为3:1:1的image_set
    f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w')
    f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w')
    f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w')

    # 加载整个训练集经过P_NET检测得到的Region Proposal
    det_boxes = pickle.load(
        open(os.path.join(save_path, 'detections.pkl'), 'rb'))
    assert len(
        det_boxes) == num_of_images, "incorrect detections or ground truths"

    # index of neg, pos and part face, used as their image names
    n_idx = 0
    p_idx = 0
    d_idx = 0
    image_done = 0
    for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
        if image_done % 100 == 0:
            print("%d images done" % image_done)
        image_done += 1

        #  意思是p_net未能对于某一张图片未能得到Region Proposal
        if dets.shape[0] == 0:
            continue

        img = cv2.imread(os.path.join(im_idx))
        # 经过convert_to_square 去掉dets中的score项,只保留4个坐标项
        dets = convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        for box in dets:
            x_left, y_top, x_right, y_bottom, _ = box.astype(int)
            width = x_right - x_left + 1
            height = y_bottom - y_top + 1

            # ignore box that is too small or beyond image border
            if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[
                    1] - 1 or y_bottom > img.shape[0] - 1:
                continue

            # compute intersection over union(IoU) between current box and all gt boxes
            Iou = IoU(box, gts)
            # 从原图截取,并resize至24×24(或48×48)
            cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                    interpolation=cv2.INTER_LINEAR)

            # save negative images and write label
            # 当前的Region Proposal与所有GT的最大IOU都小于0.3,那么这个box必定是neg
            if np.max(Iou) < 0.3:
                # Iou with all gts must below 0.3
                save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                # print(save_file)
                f2.write("%s/negative/%s" % (image_size, n_idx) + ' 0\n')
                cv2.imwrite(save_file, resized_im)
                n_idx += 1
            else:
                # find gt_box with the highest iou
                idx = np.argmax(Iou)
                # assigned_gt为与当前box最接近的Ground Truth Box
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt

                # compute bbox reg label
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)

                # save positive and part-face images and write labels
                if np.max(Iou) >= 0.65:
                    save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                    # print(save_file)
                    print()
                    f1.write("%s/positive/%s" % (image_size, p_idx) +
                             ' 1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1

                elif np.max(Iou) >= 0.4:
                    save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                    # print(save_file)
                    f3.write("%s/part/%s" % (image_size, d_idx) +
                             ' -1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
    f1.close()
    f2.close()
    f3.close()
Ejemplo n.º 7
0
    def test(self, threshold, model_iter):
        """
        :param threshold:
        :param model_iter:
        :return:
        """
        all_boxes, landmark = self.detector.detect_images(self.test_loader)
        
        hard_samples = list()
        recall = 0  # 召回率:TP / (TP + TN)
        acc_pos = 0
        acc_all = 0
        precision = 0  # 精确率:TP / (TP + FP)
        save_path = os.path.join(os.path.dirname(self.images_path[0]), '..', 'result')
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        for index, image_path in enumerate(self.images_path):
            image_name = os.path.basename(image_path)
            ground_truth = self.ground_map[image_name]
            if len(all_boxes[index]) == 0:
                print('图片{}检测不到车牌'.format(image_name))
                continue
            # 计算iou,并画框
            iou = np.ones((len(all_boxes[index]),))
            gt_boxes = np.array([ground_truth])
            for j, box in enumerate(all_boxes[index]):
                iou[j] = IoU(box, gt_boxes)
            '''
            # 画图
            im = cv2.imread(image_path)
            for j, box in enumerate(all_boxes[index]):
                # if image_name == '20180929172716720_23609_dqp001_甘A5T470.jpg':
                #    pdb.set_trace()
                # 绘制iou大于阈值的pos框
                if iou[j] > threshold:
                    cv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
                                  (255, 0, 0), 2)
                    cv2.putText(im, '{:s}|{:.2f}|{:.2f}'.format('p', box[4], iou[j]),
                                (int(box[0]), int(box[1])), cv2.FONT_HERSHEY_SIMPLEX,
                                1, (255, 0, 0))
                    for k in range(5):
                        cv2.circle(im, (landmark[index][j][2*k], landmark[index][j][2*k+1]), 1, (0, 0, 255), 4)
                    
            # 绘制ground truth
            cv2.rectangle(im, (int(ground_truth[0]), int(ground_truth[1])),
                          (int(ground_truth[2]), int(ground_truth[3])),
                          (0, 255, 0), 2)
            cv2.imwrite(os.path.join(save_path, os.path.splitext(image_name)[0] + '_' + model_iter + '.jpg'), im)
            
            print('IoU:\n', iou)
            print('average iou = {}'.format(sum(iou) / sum(iou != 0)))
            '''
            # 计算检测框iou大于阈值的平均精度
            if iou.max() > threshold:
                recall += 1
                acc_pos += np.mean(all_boxes[index][iou > threshold, 4])
                acc_all += np.mean(all_boxes[index][:, 4])
                precision += len(all_boxes[index][iou > threshold, 4]) / len(all_boxes[index][:, 4])
            else:
                hard_samples.append(image_path)

        precision /= recall
        acc_pos /= recall
        acc_all /= recall
        recall /= self.test_loader.size
        print('IoU threshold={}:'.format(threshold), 'precision={},'.format(precision),
              ' acc-pos={},'.format(acc_pos), 'acc-all={}'.format(acc_all), 'recall={}'.format(recall))
        return precision, acc_pos, acc_all, recall
                               min_face_size=min_face_size,
                               stride=stride,
                               threshold=thresh)
gt_imdb = []
path = "E:/Document/Datasets/Wider Face/WIDER_val/images"

gt_data = load_wider_face_gt_boxes("wider_face_val_bbx_gt.txt")

for item in gt_data.keys():
    gt_imdb.append(os.path.join(path, item))
test_data = TestLoader(gt_imdb)

all_boxes, landmarks = mtcnn_detector.detect_face(test_data)

count = 0
scores = []
recall_rate = 0

for imagepath in gt_imdb:
    for bbox in all_boxes[count]:

        rate = len(all_boxes)

        score = 0
        for gt_boxes in gt_data[imagepath]:
            iou = IoU(bbox, gt_boxes)
            if score > iou:
                score = iou

    count = count + 1
Ejemplo n.º 9
0
def save_12net_data(data_dir):
    anno_file = "./prepare_data/wider_face_train.txt"
    im_dir = data_dir + "/WIDER_train/images"
    save_dir = data_dir + "/12"
    pos_save_dir = data_dir + "/12/positive"
    part_save_dir = data_dir + "/12/part"
    neg_save_dir = data_dir + '/12/negative'

    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    if not os.path.exists(pos_save_dir):
        os.mkdir(pos_save_dir)
    if not os.path.exists(part_save_dir):
        os.mkdir(part_save_dir)
    if not os.path.exists(neg_save_dir):
        os.mkdir(neg_save_dir)

    f1 = open(os.path.join(save_dir, 'pos_12.txt'), 'w')
    f2 = open(os.path.join(save_dir, 'neg_12.txt'), 'w')
    f3 = open(os.path.join(save_dir, 'part_12.txt'), 'w')
    with open(anno_file, 'r') as f:
        annotations = f.readlines()
    num = len(annotations)
    print("%d pics in total" % num)
    p_idx = 0  # positive
    n_idx = 0  # negative
    d_idx = 0  # don't care
    idx = 0
    box_idx = 0
    for annotation in annotations:
        annotation = annotation.strip().split(' ')
        #image path
        im_path = annotation[0]
        #boxed change to float type
        bbox = list(map(float, annotation[1:]))
        #gt
        boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)
        #load image
        img = cv2.imread(os.path.join(im_dir, im_path + '.jpg'))
        height, width, channel = img.shape
        idx += 1
        #if idx % 100 == 0:
        #print(idx, "images done")

        neg_num = 0
        # crop image randomly, 1---->50
        # keep crop random parts, until have 50 negative examples
        # get 50 negative sample from every image
        while neg_num < 50:

            # ============================================================== #
            #neg_num's size [40,min(width, height) / 2],min_size:40
            # size is a random number between 12 and min(width,height)
            size = npr.randint(12, min(width, height) / 2)
            #top_left coordinate
            nx = npr.randint(0, width - size)
            ny = npr.randint(0, height - size)
            #random crop
            crop_box = np.array([nx, ny, nx + size, ny + size])
            #calculate iou
            Iou = IoU(crop_box, boxes)

            #crop a part from inital image
            cropped_im = img[ny:ny + size, nx:nx + size, :]
            #resize the cropped image to size 12*12
            resized_im = cv2.resize(cropped_im, (12, 12),
                                    interpolation=cv2.INTER_LINEAR)

            if np.max(Iou) < 0.3:
                # Iou with all gts must below 0.3
                save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                # ../data/12/negative/%s.jpg
                f2.write(save_dir + "/negative/%s.jpg" % n_idx + ' 0\n')
                cv2.imwrite(save_file, resized_im)
                n_idx += 1
                neg_num += 1
            # ============================================================== #

        #for every bounding boxes
        for box in boxes:

            # box (x_left, y_top, x_right, y_bottom)
            x1, y1, x2, y2 = box
            #gt's width and height
            w = x2 - x1 + 1
            h = y2 - y1 + 1

            # ignore small faces and those faces has left-top corner out of the image
            # in case the ground truth boxes of small faces are not accurate
            if max(w, h) < 20 or x1 < 0 or y1 < 0:
                continue

            # crop another 5 images near the bounding box if IoU less than 0.5, save as negative samples
            for i in range(5):
                #size of the image to be cropped
                size = npr.randint(12, min(width, height) / 2)

                # ============================================================== #
                # delta_x and delta_y are offsets of (x1, y1)
                # max can make sure if the delta is a negative number , x1+delta_x >0
                # parameter high of randint make sure there will be intersection between bbox and cropped_box
                delta_x = npr.randint(max(-size, -x1), w)
                delta_y = npr.randint(max(-size, -y1), h)
                # max here not really necessary
                nx1 = int(max(0, x1 + delta_x))
                ny1 = int(max(0, y1 + delta_y))
                # if the right bottom point is out of image then skip
                if nx1 + size > width or ny1 + size > height:
                    continue
                crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size])
                Iou = IoU(crop_box, boxes)

                cropped_im = img[ny1:ny1 + size, nx1:nx1 + size, :]
                #rexize cropped image to be 12 * 12
                resized_im = cv2.resize(cropped_im, (12, 12),
                                        interpolation=cv2.INTER_LINEAR)

                if np.max(Iou) < 0.3:
                    # Iou with all gts must below 0.3
                    save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                    f2.write(save_dir + "/negative/%s.jpg" % n_idx + ' 0\n')
                    cv2.imwrite(save_file, resized_im)
                    n_idx += 1
                # ============================================================== #

            #generate positive examples and part faces
            for i in range(20):
                # pos and part face size [minsize*0.8,maxsize*1.25]
                size = npr.randint(int(min(w, h) * 0.8),
                                   np.ceil(1.25 * max(w, h)))

                if w < 5:
                    print(w)
                    continue

                # ============================================================== #
                # delta here is the offset of box center
                # - x1 + w/2 is the central point
                # - add offset to move the bbox
                # - deduct size/2 to compute x1, and also to make sure
                #   that the right bottom corner will be out of bbox
                delta_x = npr.randint(-w * 0.2, w * 0.2)
                delta_y = npr.randint(-h * 0.2, h * 0.2)
                # show this way: nx1 = max(x1+w/2-size/2+delta_x)
                nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0))
                # show this way: ny1 = max(y1+h/2-size/2+delta_y)
                ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0))
                nx2 = nx1 + size
                ny2 = ny1 + size
                if nx2 > width or ny2 > height:
                    continue
                crop_box = np.array([nx1, ny1, nx2, ny2])

                # yu gt de offset
                # - will not be effected by resize option
                # - x, y is the location of rectangle box
                # - nx, ny is the location of square box
                # - offset_x = (x - nx) / w
                #       => x = offset_x*w + nx
                # - offset_y = (y - ny) / h
                #       => y = offset_y*h + ny
                # - [offset_x1, offset_y1, offset_x2, offset_y2] is the regression target
                offset_x1 = (x1 - nx1) / float(size)
                offset_y1 = (y1 - ny1) / float(size)
                offset_x2 = (x2 - nx2) / float(size)
                offset_y2 = (y2 - ny2) / float(size)

                #crop
                cropped_im = img[ny1:ny2, nx1:nx2, :]
                #resize
                resized_im = cv2.resize(cropped_im, (12, 12),
                                        interpolation=cv2.INTER_LINEAR)

                box_ = box.reshape(1, -1)
                iou = IoU(crop_box, box_)
                if iou >= 0.65:
                    save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                    f1.write(save_dir + "/positive/%s.jpg" % p_idx +
                             ' 1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1
                elif iou >= 0.4:
                    save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                    f3.write(save_dir + "/part/%s.jpg" % d_idx +
                             ' -1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
                # ============================================================== #

            box_idx += 1
            if idx % 100 == 0:
                print("%s images done, pos: %s part: %s neg: %s" %
                      (idx, p_idx, d_idx, n_idx))
    f1.close()
    f2.close()
    f3.close()
Ejemplo n.º 10
0
    #print(idx, "images done")

    height, width, channel = img.shape

    neg_num = 0
    #1---->50
    #对每张图片都裁剪50张negative图片作为负样本
    while neg_num < 50:
        size = npr.randint(12, min(width, height) / 2)
        #top_left coordinate
        nx = npr.randint(0, width - size)
        ny = npr.randint(0, height - size)
        #random crop
        crop_box = np.array([nx, ny, nx + size, ny + size])
        #calculate iou
        Iou = IoU(crop_box, gt_boxes_single)

        #crop a part from inital image
        cropped_im = img[ny:ny + size, nx:nx + size, :]
        #resize the cropped image to size 12*12
        resized_im = cv2.resize(cropped_im, (12, 12),
                                interpolation=cv2.INTER_LINEAR)

        if np.max(Iou) < 0.3:
            # Iou with all gts must below 0.3
            save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
            f2.write(neg_save_dir + "/%s.jpg" % n_idx + ' 0\n')
            cv2.imwrite(save_file, resized_im)
            n_idx += 1
            neg_num += 1
Ejemplo n.º 11
0
def generateData_aug(data_dir, net, argument=False):

    if net == "PNet":
        size = 12
    elif net == "RNet":
        size = 24
    elif net == "ONet":
        size = 48
    else:
        print("Net type error! ")
        return

    OUTPUT = data_dir + "/%d" % size
    if not exists(OUTPUT): os.mkdir(OUTPUT)
    dstdir = data_dir + "/%d/train_%s_landmark_aug" % (size, net)
    if not exists(dstdir): os.mkdir(dstdir)
    assert (exists(dstdir) and exists(OUTPUT))

    # get image path , bounding box, and landmarks from file 'ftxt'
    data = getDataFromTxt("./prepare_data/trainImageList.txt",
                          data_path=data_dir + '/Align')
    f = open(join(OUTPUT, "landmark_%s_aug.txt" % (size)), 'w')

    image_id = 0
    idx = 0
    for (imgPath, bbox, landmarkGt) in data:
        F_imgs = []
        F_landmarks = []
        img = cv2.imread(imgPath)

        assert (img is not None)
        img_h, img_w, img_c = img.shape
        gt_box = np.array([bbox.left, bbox.top, bbox.right, bbox.bottom])
        # get sub-image from bbox
        f_face = img[bbox.top:bbox.bottom + 1, bbox.left:bbox.right + 1]
        # resize the gt image to specified size
        f_face = cv2.resize(f_face, (size, size))
        # initialize the landmark
        landmark = np.zeros((5, 2))

        # normalize land mark by dividing the width and height of the ground truth bounding box
        # landmakrGt is a list of tuples
        for index, one in enumerate(landmarkGt):
            # (( x - bbox.left)/ width of bounding box, (y - bbox.top)/ height of bounding box
            rv = ((one[0] - gt_box[0]) / (gt_box[2] - gt_box[0]),
                  (one[1] - gt_box[1]) / (gt_box[3] - gt_box[1]))
            landmark[index] = rv

        F_imgs.append(f_face)
        F_landmarks.append(landmark.reshape(10))

        landmark = np.zeros((5, 2))
        if argument:
            idx = idx + 1
            if idx % 100 == 0:
                print(idx, "images done")
            x1, y1, x2, y2 = gt_box
            gt_w = x2 - x1 + 1
            gt_h = y2 - y1 + 1
            if max(gt_w, gt_h) < 40 or x1 < 0 or y1 < 0:
                continue
            #random shift
            for i in range(10):
                bbox_size = npr.randint(int(min(gt_w, gt_h) * 0.8),
                                        np.ceil(1.25 * max(gt_w, gt_h)))
                delta_x = npr.randint(-gt_w * 0.2, gt_w * 0.2)
                delta_y = npr.randint(-gt_h * 0.2, gt_h * 0.2)
                nx1 = int(max(x1 + gt_w / 2 - bbox_size / 2 + delta_x, 0))
                ny1 = int(max(y1 + gt_h / 2 - bbox_size / 2 + delta_y, 0))

                nx2 = nx1 + bbox_size
                ny2 = ny1 + bbox_size
                if nx2 > img_w or ny2 > img_h:
                    continue
                crop_box = np.array([nx1, ny1, nx2, ny2])

                cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :]
                resized_im = cv2.resize(cropped_im, (size, size))
                #cal iou
                iou = IoU(crop_box, np.expand_dims(gt_box, 0))
                if iou > 0.65:
                    F_imgs.append(resized_im)
                    #normalize
                    for index, one in enumerate(landmarkGt):
                        rv = ((one[0] - nx1) / bbox_size,
                              (one[1] - ny1) / bbox_size)
                        landmark[index] = rv
                    F_landmarks.append(landmark.reshape(10))
                    landmark = np.zeros((5, 2))
                    landmark_ = F_landmarks[-1].reshape(-1, 2)
                    bbox = BBox([nx1, ny1, nx2, ny2])
                    #mirror
                    if random.choice([0, 1]) > 0:
                        face_flipped, landmark_flipped = flip(
                            resized_im, landmark_)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        #c*h*w
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))
                    #rotate
                    if random.choice([0, 1]) > 0:
                        face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
                                                                         bbox.reprojectLandmark(landmark_), 5)#逆时针旋转
                        #landmark_offset
                        landmark_rotated = bbox.projectLandmark(
                            landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(
                            face_rotated_by_alpha, (size, size))
                        F_imgs.append(face_rotated_by_alpha)
                        F_landmarks.append(landmark_rotated.reshape(10))

                        #flip
                        face_flipped, landmark_flipped = flip(
                            face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))
                    #anti-clockwise rotation
                    if random.choice([0, 1]) > 0:
                        face_rotated_by_alpha, landmark_rotated = rotate(img, bbox, \
                                                                         bbox.reprojectLandmark(landmark_), -5)#顺时针旋转
                        landmark_rotated = bbox.projectLandmark(
                            landmark_rotated)
                        face_rotated_by_alpha = cv2.resize(
                            face_rotated_by_alpha, (size, size))
                        F_imgs.append(face_rotated_by_alpha)
                        F_landmarks.append(landmark_rotated.reshape(10))

                        face_flipped, landmark_flipped = flip(
                            face_rotated_by_alpha, landmark_rotated)
                        face_flipped = cv2.resize(face_flipped, (size, size))
                        F_imgs.append(face_flipped)
                        F_landmarks.append(landmark_flipped.reshape(10))

            F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks)

            for i in range(len(F_imgs)):

                if np.sum(np.where(F_landmarks[i] <= 0, 1, 0)) > 0:
                    continue
                if np.sum(np.where(F_landmarks[i] >= 1, 1, 0)) > 0:
                    continue

                cv2.imwrite(join(dstdir, "%d.jpg" % (image_id)), F_imgs[i])
                landmarks = map(str, list(F_landmarks[i]))
                f.write(dstdir + "/%d.jpg" % (image_id) + " -2 " +
                        " ".join(landmarks) + "\n")
                image_id = image_id + 1

    f.close()
    return F_imgs, F_landmarks