Exemple #1
0
def face_body_linedataset_for_image(shuffle=True):
    annotations_dir = '/media/dm/dm2/project/dataset/face_body/face_body_dataset/Annotations'
    image_dir = "/media/dm/dm2/project/dataset/face_body/face_body_dataset/images"
    train_filename = "/media/dm/dm2/project/dataset/face_body/face_body_dataset/train.txt"
    val_filename = "/media/dm/dm2/project/dataset/face_body/face_body_dataset/val.txt"

    image_list = file_processing.get_files_list(image_dir, postfix=["*.jpg"])
    print("have {} images".format(len(image_list)))
    if shuffle:
        seeds = 100  # 固定种子,只要seed的值一样,后续生成的随机数都一样
        random.seed(seeds)
        random.shuffle(image_list)

    # 分割成train和val数据集
    factor = 0.95
    train_num = int(factor * len(image_list))
    train_image_list = image_list[:train_num]
    val_image_list = image_list[train_num:]

    # 转换label数据
    print("doing train data .....")
    convert_to_linedataset(annotations_dir, train_image_list, classes,
                           train_filename)
    print("doing val data .....")
    convert_to_linedataset(annotations_dir, val_image_list, classes,
                           val_filename)

    print("done...ok!")
    linedataset_test(train_filename, image_dir=image_dir, show=True)
def linedataset_for_image(shuffle=True):
    '''
    label data format:
    SSD  = [label_id,x,y,w,h]
    YOLO = [label_id,x_center/img_width ,y_center/img_height ,width/img_width ,height/img_height]
    MMDET= [img_width,img_height,label_id,x,y,w,h]
    :param annotations_dir:
    :param image_dir:
    :param label_out_dir:
    :param out_train_val_path:
    :param class_names:
    :param coordinatesType: 坐标类型:SSD,YOLO,MMDET格式
    :param show:
    :return:
    '''
    show = False
    coordinatesType = "SSD"
    # PCwall
    # train_filename = "/media/dm/dm2/project/dataset/VOC_wall/train.txt"
    # val_filename = "/media/dm/dm2/project/dataset/VOC_wall/val.txt"
    # annotations_dir = '/media/dm/dm2/project/dataset/VOC_wall/Annotations'
    # image_dir = "/media/dm/dm2/project/dataset/VOC_wall/JPEGImages"

    # VOC
    # DATA_ROOT="/media/dm/dm2/project/dataset/VOCdevkit/VOC2007/"
    # annotations_dir=DATA_ROOT+'Annotations'
    # image_dir=DATA_ROOT+"JPEGImages"
    # train_filename = DATA_ROOT+"train.txt"
    # val_filename = DATA_ROOT+"val.txt"

    # widerface
    DATA_ROOT = "/media/dm/dm2/project/dataset/face/wider_face_voc/"
    annotations_dir = DATA_ROOT + 'Annotations'
    image_dir = DATA_ROOT + "JPEGImages"
    train_filename = DATA_ROOT + "train.txt"
    val_filename = DATA_ROOT + "val.txt"

    image_list = file_processing.get_files_list(image_dir, postfix=["*.jpg"])
    print("have {} images".format(len(image_list)))
    if shuffle:
        seeds = 100  # 固定种子,只要seed的值一样,后续生成的随机数都一样
        random.seed(seeds)
        random.shuffle(image_list)

    # 分割成train和val数据集
    factor = 0.95
    train_num = int(factor * len(image_list))
    train_image_list = image_list[:train_num]
    val_image_list = image_list[train_num:]

    # 转换label数据
    print("doing train data .....")
    convert_voc_to_linedataset(annotations_dir, train_image_list, classes,
                               train_filename, coordinatesType, show)
    print("doing val data .....")
    convert_voc_to_linedataset(annotations_dir, val_image_list, classes,
                               val_filename, coordinatesType, show)
    print("done...ok!")
    # test
    linedataset_test(train_filename, classes, image_dir=image_dir, show=True)
Exemple #3
0
def face_body_test(annotations_dir, image_dir, class_names, show=True):
    '''
    :param annotations_dir:
    :param image_dir:
    :param class_names:
    :param show:
    :return:
    '''
    annotations_list = file_processing.get_files_list(annotations_dir,
                                                      postfix=["*.json"])
    print("have {} annotations files".format(len(annotations_list)))
    for i, annotations_file in enumerate(annotations_list):
        name_id = os.path.basename(annotations_file)[:-len(".json")]
        image_name = name_id + ".jpg"
        image_path = os.path.join(image_dir, image_name)
        if not os.path.exists(image_path):
            print("no image_dict:{}".format(image_path))
            continue
        if not os.path.exists(annotations_file):
            print("no annotations:{}".format(annotations_file))
            continue
        boxList = face_body.get_annotation(annotations_file, class_names)
        if not boxList:
            print("no class in annotations:{}".format(annotations_file))
            continue
        if show:
            image = image_processing.read_image(image_path)
            # image_processing.show_image_rects_text("image_dict", image_dict, rects, class_name)
            image_processing.show_boxList("image_dict", boxList, image)
def text_dataset_for_annotation(annotations_dir,
                                image_dir,
                                label_out_dir,
                                out_train_val_path,
                                class_names,
                                coordinatesType,
                                shuffle=True,
                                labelType="class_id",
                                show=True):
    '''
    :param annotations_dir:
    :param image_dir:
    :param label_out_dir:
    :param out_train_val_path:
    :param class_names:
    :param labelType:class_name,class_id
    :param show:
    :return:
    '''
    annotations_list = file_processing.get_files_list(annotations_dir,
                                                      postfix=["*.xml"])
    print("have {} annotations files".format(len(annotations_list)))
    if shuffle:
        seeds = 100  # 固定种子,只要seed的值一样,后续生成的随机数都一样
        random.seed(seeds)
        random.shuffle(annotations_list)

    # 分割成train和val数据集
    factor = 0.8
    train_num = int(factor * len(annotations_list))
    train_annotations_list = annotations_list[:train_num]
    val_annotations_list = annotations_list[train_num:]

    # 转换label数据
    print("doing train data .....")
    train_image_id = convert_voc_to_textdataset_for_annotation(
        train_annotations_list,
        image_dir,
        label_out_dir,
        class_names,
        coordinatesType,
        image_type=".jpg",
        labelType=labelType,
        show=show)
    print("doing val data .....")
    val_image_id = convert_voc_to_textdataset_for_annotation(
        val_annotations_list,
        image_dir,
        label_out_dir,
        class_names,
        coordinatesType,
        image_type=".jpg",
        labelType=labelType,
        show=show)
    print("done...ok!")

    # 保存图片id数据
    train_id_path = os.path.join(out_train_val_path, "train.txt")
    val_id_path = os.path.join(out_train_val_path, "val.txt")
    comment.save_id(train_id_path, train_image_id, val_id_path, val_image_id)
Exemple #5
0
def convert_voc_label_for_image(annotations_dir,
                                image_dir,
                                label_out_dir,
                                out_train_val_path,
                                class_names,
                                show=True):
    image_list = file_processing.get_files_list(image_dir, postfix=["*.bmp"])
    print("have {} images".format(len(image_list)))
    # 分割成train和val数据集
    factor = 0.8
    train_num = int(factor * len(image_list))
    train_image_list = image_list[:train_num]
    val_image_list = image_list[train_num:]

    # 转换label数据
    print("doing train data .....")
    train_image_id = convert_annotation_image(train_image_list,
                                              annotations_dir,
                                              label_out_dir,
                                              class_names,
                                              show=show)
    print("doing val data .....")
    val_image_id = convert_annotation_image(val_image_list,
                                            annotations_dir,
                                            label_out_dir,
                                            class_names,
                                            show=show)
    print("done...ok!")

    # 保存图片id数据
    train_id_path = os.path.join(out_train_val_path, "train.txt")
    val_id_path = os.path.join(out_train_val_path, "val.txt")
    save_id(train_id_path, train_image_id, val_id_path, val_image_id)
Exemple #6
0
def pascal_voc_test(annotations_dir,
                    image_dir,
                    class_names,
                    coordinatesType="SSD",
                    show=True):
    '''

    :param annotations_dir:
    :param image_dir:
    :param class_names:
    :param coordinatesType:
    :param show:
    :return:
    '''
    annotations_list = file_processing.get_files_list(annotations_dir,
                                                      postfix=["*.xml"])
    print("have {} annotations files".format(len(annotations_list)))
    for i, annotations_file in enumerate(annotations_list):
        name_id = os.path.basename(annotations_file)[:-len(".xml")]
        image_name = name_id + ".jpg"
        image_path = os.path.join(image_dir, image_name)
        if not os.path.exists(image_path):
            print("no image_dict:{}".format(image_path))
            continue
        if not os.path.exists(annotations_file):
            print("no annotations:{}".format(annotations_file))
            continue
        rects, class_name, class_id = pascal_voc.get_annotation(
            annotations_file, class_names, coordinatesType)
        if len(rects) == 0 or len(class_name) == 0 or len(class_id) == 0:
            print("no class in annotations:{}".format(annotations_file))
        if show:
            image = image_processing.read_image(image_path)
            image_processing.show_image_rects_text("image_dict", image, rects,
                                                   class_name)
    def convert_images2video(self, image_dir, save_video, freq=1, fps=30):
        """
        :param image_dir:
        :param save_video:
        :param freq:
        :return:
        """
        image_list = file_processing.get_files_list(image_dir,
                                                    postfix=["*.jpg", "*.png"])
        image_path = image_list[0]
        frame = cv2.imread(image_path)
        h, w, d = frame.shape
        video_writer = image_processing.get_video_writer(save_video,
                                                         width=w,
                                                         height=h,
                                                         fps=fps)
        # freq = int(fps / detect_freq)
        count = 0
        for image_path in tqdm(image_list):
            frame = cv2.imread(image_path)
            if frame is None:
                continue
            if count % freq == 0:
                out_frame = self.do_something(frame)
                video_writer.write(out_frame)

            count += 1
        video_writer.release()
def text_dataset_for_image(annotations_dir,
                           image_dir,
                           label_out_dir,
                           out_train_val_path,
                           class_names,
                           coordinatesType,
                           shuffle=True,
                           labelType="class_id",
                           show=True):
    '''
    label data format:
    SSD  = [label_id,x,y,w,h]
    YOLO = [label_id,x_center/img_width ,y_center/img_height ,width/img_width ,height/img_height]
    MMDET= [img_width,img_height,label_id,x,y,w,h]
    :param annotations_dir:
    :param image_dir:
    :param label_out_dir:
    :param out_train_val_path:
    :param class_names:
    :param coordinatesType: 坐标类型:SSD,YOLO,MMDET格式
    :param show:
    :return:
    '''
    image_list = file_processing.get_files_list(image_dir, postfix=["*.jpg"])
    print("have {} images".format(len(image_list)))
    if shuffle:
        seeds = 100  # 固定种子,只要seed的值一样,后续生成的随机数都一样
        random.seed(seeds)
        random.shuffle(image_list)

    # 分割成train和val数据集
    factor = 0.90
    train_num = int(factor * len(image_list))
    train_image_list = image_list[:train_num]
    val_image_list = image_list[train_num:]

    # 转换label数据
    print("doing train data .....")
    train_image_id = convert_voc_to_textdataset_for_image(train_image_list,
                                                          annotations_dir,
                                                          label_out_dir,
                                                          class_names,
                                                          coordinatesType,
                                                          labelType=labelType,
                                                          show=show)
    print("doing val data .....")
    val_image_id = convert_voc_to_textdataset_for_image(val_image_list,
                                                        annotations_dir,
                                                        label_out_dir,
                                                        class_names,
                                                        coordinatesType,
                                                        labelType=labelType,
                                                        show=show)
    print("done...ok!")

    # 保存图片id数据
    train_id_path = os.path.join(out_train_val_path, "train.txt")
    val_id_path = os.path.join(out_train_val_path, "val.txt")
    comment.save_id(train_id_path, train_image_id, val_id_path, val_image_id)
def get_combinations_pair_data(image_dir, pair_num=0):
    '''
    get image_dir image_dict list,combinations image_dict
    :param image_dir:
    :return:
    '''
    select_nums = int(pair_num / 2)
    _ID = True
    image_list = file_processing.get_files_list(image_dir, postfix=["*.jpg"])
    nums = len(image_list)
    print("have {} images and {} combinations".format(nums,
                                                      nums * (nums - 1) / 2))
    pair_issame = []
    for paths in itertools.combinations(image_list, 2):
        image_path1, image_path2 = paths
        name1 = os.path.basename(image_path1)
        name2 = os.path.basename(image_path2)
        label1 = image_path1.split(os.sep)[-2]
        label2 = image_path2.split(os.sep)[-2]
        if label1 == label2:
            issame = 1
        else:
            issame = 0
        if _ID:
            image_id1 = os.path.join(label1, name1)
            image_id2 = os.path.join(label2, name2)
            pair_issame.append([image_id1, image_id2, issame])
        else:
            pair_issame.append([image_path1, image_path2, issame])

    pair_issame = np.asarray(pair_issame)
    pair_issame = pair_issame[np.lexsort(pair_issame.T)]
    pair_issame_0 = pair_issame[pair_issame[:, -1] == "0", :]
    pair_issame_1 = pair_issame[pair_issame[:, -1] == "1", :]
    num_pair_issame_1 = len(pair_issame_1)
    num_pair_issame_0 = len(pair_issame_0)  # pair_issame_0.shape[0]
    if select_nums == 0 or select_nums is None:
        select_nums = num_pair_issame_1
    else:
        if select_nums > num_pair_issame_1:
            raise Exception(
                "pair_nums({}) must be less than num_pair_issame_1({})".format(
                    select_nums, num_pair_issame_1))

    index_0 = np.random.permutation(num_pair_issame_0)[:select_nums]  # 打乱后的行号
    index_1 = np.random.permutation(num_pair_issame_1)[:select_nums]  # 打乱后的行号
    pair_issame_0 = pair_issame_0[index_0, :]  # 获取打乱后的训练数据
    pair_issame_1 = pair_issame_1[index_1, :]  # 获取打乱后的训练数据
    pair_issame = np.concatenate([pair_issame_0, pair_issame_1], axis=0)
    print("pair_issame_0 nums:{}".format(len(pair_issame_0)))
    print("pair_issame_1 nums:{}".format(len(pair_issame_1)))

    # image_list1 = pair_issame[:, 0]
    # image_list2 = pair_issame[:, 1]
    # issame_list = pair_issame[:, 2]
    print("have {} pairs".format(len(pair_issame)))
    return pair_issame
def convert_images_format(image_dir, out_dir):
    image_list = file_processing.get_files_list(image_dir, postfix=["*.bmp"])
    for image_path in image_list:
        image = image_processing.read_image(image_path)
        basename = os.path.basename(image_path).replace("bmp", "jpg")
        # dest_path=os.path.join(out_dir,basename)
        dest_path = file_processing.create_dir(out_dir,
                                               dir1=None,
                                               filename=basename)
        image_processing.save_image(dest_path, image, toUINT8=False)
Exemple #11
0
def ramdom_select_image_dir(image_dir, dest_dir):
    select_nums = 100
    image_id = file_processing.get_sub_directory_list(image_dir)
    for id in image_id:
        image_list = file_processing.get_files_list(os.path.join(image_dir, id),
                                                    postfix=['*.jpg', "*.jpeg", '*.png', "*.JPG"])
        image_list = np.random.permutation(image_list)[:select_nums]
        for src_path in image_list:
            basename = os.path.basename(src_path)
            dest_path = file_processing.create_dir(dest_dir, id, basename)
            shutil.copy(src_path, dest_path)
Exemple #12
0
def select_image_dir(image_dir, dest_dir):
    image_id = file_processing.get_sub_directory_list(image_dir)
    for id in image_id:
        image_list = file_processing.get_files_list(os.path.join(image_dir, id),
                                                    postfix=['*.jpg', "*.jpeg", '*.png', "*.bmp"])
        for src_path in image_list:
            basename = os.path.basename(src_path)
            index = basename.split(".")[0].split("_")[1]
            if index == "0":
                dest_path = file_processing.create_dir(dest_dir, id, basename)
                # shutil.copy(src_path, dest_path)
                file_processing.move_file(src_path, dest_path)
Exemple #13
0
def image_to_facebank(image_dir, dest_dir):
    from xpinyin import Pinyin
    p = Pinyin()
    image_list = file_processing.get_files_list(image_dir,
                                                postfix=['*.jpg', "*.jpeg", '*.png', "*.JPG"])
    nums_images = len(image_list)
    print("have ID:{}".format(nums_images))
    for image_path in image_list:
        basename = os.path.basename(image_path)
        id_name = basename.split(".")[0]
        id_name = p.get_pinyin(id_name, '')
        dest_path = file_processing.create_dir(dest_dir, id_name, basename)
        file_processing.copy_file(image_path, dest_path)
def convert_image_format(image_dir,
                         dest_dir,
                         resize_width=None,
                         dest_format='.jpg'):
    image_id = file_processing.get_sub_directory_list(image_dir)
    for id in image_id:
        image_list = file_processing.get_files_list(
            os.path.join(image_dir, id),
            postfix=['*.jpg', "*.jpeg", '*.png', "*.JPG"])
        print("processing :{}".format(id))
        for src_path in image_list:
            basename = os.path.basename(src_path).split('.')[0]
            image = image_processing.read_image_gbk(src_path,
                                                    resize_width=resize_width)
            dest_path = file_processing.create_dir(dest_dir, id,
                                                   basename + dest_format)
            file_processing.create_file_path(dest_path)
            image_processing.save_image(dest_path, image)
Exemple #15
0
def select_facebank_detect(image_dir, dest_dir, id_nums=None, detect_face=True):
    if detect_face:
        # model_path = "../../face_detection/face_detection_rbf.pth"
        # model_path = "/media/dm/dm1/git/python-learning-notes/libs/ultra_ligh_face/face_detection_rbf.pth"
        model_path = "/home/panjinquan/project/python-learning-notes//libs/ultra_ligh_face/face_detection_rbf.pth"
        network = "RFB"
        confidence_threshold = 0.85
        nms_threshold = 0.3
        top_k = 500
        keep_top_k = 750
        device = "cuda:0"
        detector = UltraLightFaceDetector(model_path=model_path,
                                          network=network,
                                          confidence_threshold=confidence_threshold,
                                          nms_threshold=nms_threshold,
                                          top_k=top_k,
                                          keep_top_k=keep_top_k,
                                          device=device)
    per_nums = 1
    image_id = file_processing.get_sub_directory_list(image_dir)
    nums_images = len(image_id)
    print("have ID:{}".format(nums_images))
    if id_nums:
        id_nums = min(id_nums, nums_images)
        image_id = image_id[:id_nums]
    print("select ID:{}".format(len(image_id)))

    for id in image_id:
        image_list = file_processing.get_files_list(os.path.join(image_dir, id),
                                                    postfix=['*.jpg', "*.jpeg", '*.png', "*.JPG"])
        count = 0
        for src_path in image_list:
            basename = os.path.basename(src_path)
            if detect_face:
                bgr_image = cv2.imread(src_path)
                bboxes, scores, landms = detector.detect(bgr_image, isshow=True)
                if not len(bboxes) == 1:
                    print("no face:{}".format(src_path))
                    continue
            if count >= per_nums:
                break
            count += 1
            dest_path = file_processing.create_dir(dest_dir, id, basename)
            file_processing.copy_file(src_path, dest_path)
Exemple #16
0
def select_facebank(image_dir, dest_dir, id_nums=10):
    per_nums = 1
    image_id = file_processing.get_sub_directory_list(image_dir)
    nums_images = len(image_id)
    print("have ID:{}".format(nums_images))
    if id_nums:
        id_nums = min(id_nums, nums_images)
        image_id = image_id[:id_nums]
    print("select ID:{}".format(len(image_id)))
    for id in image_id:
        image_list = file_processing.get_files_list(os.path.join(image_dir, id),
                                                    postfix=['*.jpg', "*.jpeg", '*.png', "*.JPG"])
        count = 0
        for src_path in image_list:
            basename = os.path.basename(src_path)
            if count >= per_nums:
                break
            count += 1
            dest_path = file_processing.create_dir(dest_dir, id, basename)
            file_processing.copy_file(src_path, dest_path)
Exemple #17
0
def face_body_for_image(shuffle=False):
    out_train_val_path = "/media/dm/dm2/project/dataset/face_body/SSD"  # 输出 train/val 文件

    annotations_dir = '/media/dm/dm2/project/dataset/face_body/face_body_dataset/Annotations'
    # face_body_test(annotations_dir, image_dir, classes, coordinatesType="SSD", show=True)
    label_out_dir = "/media/dm/dm2/project/dataset/face_body/SSD/label"
    image_dir = "/media/dm/dm2/project/dataset/face_body/SSD/trainval"
    image_list = file_processing.get_files_list(image_dir, postfix=["*.jpg"])
    print("have {} images".format(len(image_list)))
    if shuffle:
        seeds = 100  # 固定种子,只要seed的值一样,后续生成的随机数都一样
        random.seed(seeds)
        random.shuffle(image_list)

    # 分割成train和val数据集
    factor = 0.95
    train_num = int(factor * len(image_list))
    train_image_list = image_list[:train_num]
    val_image_list = image_list[train_num:]

    train_image_id = convert_facebody_to_textdataset(train_image_list,
                                                     annotations_dir,
                                                     label_out_dir,
                                                     classes,
                                                     coordinatesType="SSD",
                                                     show=False)
    val_image_id = convert_facebody_to_textdataset(val_image_list,
                                                   annotations_dir,
                                                   label_out_dir,
                                                   classes,
                                                   coordinatesType="SSD",
                                                   show=False)
    print("done...ok!")
    # # 保存图片id数据
    train_id_path = os.path.join(out_train_val_path, "train.txt")
    val_id_path = os.path.join(out_train_val_path, "val.txt")
    comment.save_id(train_id_path, train_image_id, val_id_path, val_image_id)
    batch_label_test(label_out_dir, image_dir, classes)
Exemple #18
0
def rename_image_dir(dataset_dir, prefix="ID", add_sub=False):
    image_list = file_processing.get_files_list(dataset_dir,
                                                postfix=['*.jpg', "*.png"])
    for image_path in image_list:
        format = os.path.basename(image_path).split(".")[-1]
        dirname = os.path.dirname(image_path)
        sub = image_path.split(os.sep)[-2]
        # basename=os.path.basename(image_path)
        index = 0
        newName = [prefix]
        if add_sub:
            newName += [sub]
        newName += ['{:0=5}.{}'.format(index, format)]
        newName = "_".join(newName)
        newpath = os.path.join(dirname, newName)
        while os.path.exists(newpath):
            index += 1
            newName += ['{:0=5}.{}'.format(index, format)]
            newName = "_".join(newName)
            newpath = os.path.join(dirname, newName)

        print(image_path)
        print(newName)
        os.rename(image_path, newpath)
 def detect_image_dir(self, image_dir):
     image_list = file_processing.get_files_list(image_dir,
                                                 postfix=["*.jpg"])
     for image_path in image_list:
         image = cv2.imread(image_path)
         self.detect(image, ishow=True)
Exemple #20
0
def batch_label_test(label_dir, image_dir, classes):
    file_list = file_processing.get_files_list(label_dir, postfix=[".txt"])
    for filename in file_list:
        label_test(image_dir, filename, class_names=classes)