def face_body_linedataset_for_image(shuffle=True): annotations_dir = '/media/dm/dm2/project/dataset/face_body/face_body_dataset/Annotations' image_dir = "/media/dm/dm2/project/dataset/face_body/face_body_dataset/images" train_filename = "/media/dm/dm2/project/dataset/face_body/face_body_dataset/train.txt" val_filename = "/media/dm/dm2/project/dataset/face_body/face_body_dataset/val.txt" image_list = file_processing.get_files_list(image_dir, postfix=["*.jpg"]) print("have {} images".format(len(image_list))) if shuffle: seeds = 100 # 固定种子,只要seed的值一样,后续生成的随机数都一样 random.seed(seeds) random.shuffle(image_list) # 分割成train和val数据集 factor = 0.95 train_num = int(factor * len(image_list)) train_image_list = image_list[:train_num] val_image_list = image_list[train_num:] # 转换label数据 print("doing train data .....") convert_to_linedataset(annotations_dir, train_image_list, classes, train_filename) print("doing val data .....") convert_to_linedataset(annotations_dir, val_image_list, classes, val_filename) print("done...ok!") linedataset_test(train_filename, image_dir=image_dir, show=True)
def linedataset_for_image(shuffle=True): ''' label data format: SSD = [label_id,x,y,w,h] YOLO = [label_id,x_center/img_width ,y_center/img_height ,width/img_width ,height/img_height] MMDET= [img_width,img_height,label_id,x,y,w,h] :param annotations_dir: :param image_dir: :param label_out_dir: :param out_train_val_path: :param class_names: :param coordinatesType: 坐标类型:SSD,YOLO,MMDET格式 :param show: :return: ''' show = False coordinatesType = "SSD" # PCwall # train_filename = "/media/dm/dm2/project/dataset/VOC_wall/train.txt" # val_filename = "/media/dm/dm2/project/dataset/VOC_wall/val.txt" # annotations_dir = '/media/dm/dm2/project/dataset/VOC_wall/Annotations' # image_dir = "/media/dm/dm2/project/dataset/VOC_wall/JPEGImages" # VOC # DATA_ROOT="/media/dm/dm2/project/dataset/VOCdevkit/VOC2007/" # annotations_dir=DATA_ROOT+'Annotations' # image_dir=DATA_ROOT+"JPEGImages" # train_filename = DATA_ROOT+"train.txt" # val_filename = DATA_ROOT+"val.txt" # widerface DATA_ROOT = "/media/dm/dm2/project/dataset/face/wider_face_voc/" annotations_dir = DATA_ROOT + 'Annotations' image_dir = DATA_ROOT + "JPEGImages" train_filename = DATA_ROOT + "train.txt" val_filename = DATA_ROOT + "val.txt" image_list = file_processing.get_files_list(image_dir, postfix=["*.jpg"]) print("have {} images".format(len(image_list))) if shuffle: seeds = 100 # 固定种子,只要seed的值一样,后续生成的随机数都一样 random.seed(seeds) random.shuffle(image_list) # 分割成train和val数据集 factor = 0.95 train_num = int(factor * len(image_list)) train_image_list = image_list[:train_num] val_image_list = image_list[train_num:] # 转换label数据 print("doing train data .....") convert_voc_to_linedataset(annotations_dir, train_image_list, classes, train_filename, coordinatesType, show) print("doing val data .....") convert_voc_to_linedataset(annotations_dir, val_image_list, classes, val_filename, coordinatesType, show) print("done...ok!") # test linedataset_test(train_filename, classes, image_dir=image_dir, show=True)
def face_body_test(annotations_dir, image_dir, class_names, show=True): ''' :param annotations_dir: :param image_dir: :param class_names: :param show: :return: ''' annotations_list = file_processing.get_files_list(annotations_dir, postfix=["*.json"]) print("have {} annotations files".format(len(annotations_list))) for i, annotations_file in enumerate(annotations_list): name_id = os.path.basename(annotations_file)[:-len(".json")] image_name = name_id + ".jpg" image_path = os.path.join(image_dir, image_name) if not os.path.exists(image_path): print("no image_dict:{}".format(image_path)) continue if not os.path.exists(annotations_file): print("no annotations:{}".format(annotations_file)) continue boxList = face_body.get_annotation(annotations_file, class_names) if not boxList: print("no class in annotations:{}".format(annotations_file)) continue if show: image = image_processing.read_image(image_path) # image_processing.show_image_rects_text("image_dict", image_dict, rects, class_name) image_processing.show_boxList("image_dict", boxList, image)
def text_dataset_for_annotation(annotations_dir, image_dir, label_out_dir, out_train_val_path, class_names, coordinatesType, shuffle=True, labelType="class_id", show=True): ''' :param annotations_dir: :param image_dir: :param label_out_dir: :param out_train_val_path: :param class_names: :param labelType:class_name,class_id :param show: :return: ''' annotations_list = file_processing.get_files_list(annotations_dir, postfix=["*.xml"]) print("have {} annotations files".format(len(annotations_list))) if shuffle: seeds = 100 # 固定种子,只要seed的值一样,后续生成的随机数都一样 random.seed(seeds) random.shuffle(annotations_list) # 分割成train和val数据集 factor = 0.8 train_num = int(factor * len(annotations_list)) train_annotations_list = annotations_list[:train_num] val_annotations_list = annotations_list[train_num:] # 转换label数据 print("doing train data .....") train_image_id = convert_voc_to_textdataset_for_annotation( train_annotations_list, image_dir, label_out_dir, class_names, coordinatesType, image_type=".jpg", labelType=labelType, show=show) print("doing val data .....") val_image_id = convert_voc_to_textdataset_for_annotation( val_annotations_list, image_dir, label_out_dir, class_names, coordinatesType, image_type=".jpg", labelType=labelType, show=show) print("done...ok!") # 保存图片id数据 train_id_path = os.path.join(out_train_val_path, "train.txt") val_id_path = os.path.join(out_train_val_path, "val.txt") comment.save_id(train_id_path, train_image_id, val_id_path, val_image_id)
def convert_voc_label_for_image(annotations_dir, image_dir, label_out_dir, out_train_val_path, class_names, show=True): image_list = file_processing.get_files_list(image_dir, postfix=["*.bmp"]) print("have {} images".format(len(image_list))) # 分割成train和val数据集 factor = 0.8 train_num = int(factor * len(image_list)) train_image_list = image_list[:train_num] val_image_list = image_list[train_num:] # 转换label数据 print("doing train data .....") train_image_id = convert_annotation_image(train_image_list, annotations_dir, label_out_dir, class_names, show=show) print("doing val data .....") val_image_id = convert_annotation_image(val_image_list, annotations_dir, label_out_dir, class_names, show=show) print("done...ok!") # 保存图片id数据 train_id_path = os.path.join(out_train_val_path, "train.txt") val_id_path = os.path.join(out_train_val_path, "val.txt") save_id(train_id_path, train_image_id, val_id_path, val_image_id)
def pascal_voc_test(annotations_dir, image_dir, class_names, coordinatesType="SSD", show=True): ''' :param annotations_dir: :param image_dir: :param class_names: :param coordinatesType: :param show: :return: ''' annotations_list = file_processing.get_files_list(annotations_dir, postfix=["*.xml"]) print("have {} annotations files".format(len(annotations_list))) for i, annotations_file in enumerate(annotations_list): name_id = os.path.basename(annotations_file)[:-len(".xml")] image_name = name_id + ".jpg" image_path = os.path.join(image_dir, image_name) if not os.path.exists(image_path): print("no image_dict:{}".format(image_path)) continue if not os.path.exists(annotations_file): print("no annotations:{}".format(annotations_file)) continue rects, class_name, class_id = pascal_voc.get_annotation( annotations_file, class_names, coordinatesType) if len(rects) == 0 or len(class_name) == 0 or len(class_id) == 0: print("no class in annotations:{}".format(annotations_file)) if show: image = image_processing.read_image(image_path) image_processing.show_image_rects_text("image_dict", image, rects, class_name)
def convert_images2video(self, image_dir, save_video, freq=1, fps=30): """ :param image_dir: :param save_video: :param freq: :return: """ image_list = file_processing.get_files_list(image_dir, postfix=["*.jpg", "*.png"]) image_path = image_list[0] frame = cv2.imread(image_path) h, w, d = frame.shape video_writer = image_processing.get_video_writer(save_video, width=w, height=h, fps=fps) # freq = int(fps / detect_freq) count = 0 for image_path in tqdm(image_list): frame = cv2.imread(image_path) if frame is None: continue if count % freq == 0: out_frame = self.do_something(frame) video_writer.write(out_frame) count += 1 video_writer.release()
def text_dataset_for_image(annotations_dir, image_dir, label_out_dir, out_train_val_path, class_names, coordinatesType, shuffle=True, labelType="class_id", show=True): ''' label data format: SSD = [label_id,x,y,w,h] YOLO = [label_id,x_center/img_width ,y_center/img_height ,width/img_width ,height/img_height] MMDET= [img_width,img_height,label_id,x,y,w,h] :param annotations_dir: :param image_dir: :param label_out_dir: :param out_train_val_path: :param class_names: :param coordinatesType: 坐标类型:SSD,YOLO,MMDET格式 :param show: :return: ''' image_list = file_processing.get_files_list(image_dir, postfix=["*.jpg"]) print("have {} images".format(len(image_list))) if shuffle: seeds = 100 # 固定种子,只要seed的值一样,后续生成的随机数都一样 random.seed(seeds) random.shuffle(image_list) # 分割成train和val数据集 factor = 0.90 train_num = int(factor * len(image_list)) train_image_list = image_list[:train_num] val_image_list = image_list[train_num:] # 转换label数据 print("doing train data .....") train_image_id = convert_voc_to_textdataset_for_image(train_image_list, annotations_dir, label_out_dir, class_names, coordinatesType, labelType=labelType, show=show) print("doing val data .....") val_image_id = convert_voc_to_textdataset_for_image(val_image_list, annotations_dir, label_out_dir, class_names, coordinatesType, labelType=labelType, show=show) print("done...ok!") # 保存图片id数据 train_id_path = os.path.join(out_train_val_path, "train.txt") val_id_path = os.path.join(out_train_val_path, "val.txt") comment.save_id(train_id_path, train_image_id, val_id_path, val_image_id)
def get_combinations_pair_data(image_dir, pair_num=0): ''' get image_dir image_dict list,combinations image_dict :param image_dir: :return: ''' select_nums = int(pair_num / 2) _ID = True image_list = file_processing.get_files_list(image_dir, postfix=["*.jpg"]) nums = len(image_list) print("have {} images and {} combinations".format(nums, nums * (nums - 1) / 2)) pair_issame = [] for paths in itertools.combinations(image_list, 2): image_path1, image_path2 = paths name1 = os.path.basename(image_path1) name2 = os.path.basename(image_path2) label1 = image_path1.split(os.sep)[-2] label2 = image_path2.split(os.sep)[-2] if label1 == label2: issame = 1 else: issame = 0 if _ID: image_id1 = os.path.join(label1, name1) image_id2 = os.path.join(label2, name2) pair_issame.append([image_id1, image_id2, issame]) else: pair_issame.append([image_path1, image_path2, issame]) pair_issame = np.asarray(pair_issame) pair_issame = pair_issame[np.lexsort(pair_issame.T)] pair_issame_0 = pair_issame[pair_issame[:, -1] == "0", :] pair_issame_1 = pair_issame[pair_issame[:, -1] == "1", :] num_pair_issame_1 = len(pair_issame_1) num_pair_issame_0 = len(pair_issame_0) # pair_issame_0.shape[0] if select_nums == 0 or select_nums is None: select_nums = num_pair_issame_1 else: if select_nums > num_pair_issame_1: raise Exception( "pair_nums({}) must be less than num_pair_issame_1({})".format( select_nums, num_pair_issame_1)) index_0 = np.random.permutation(num_pair_issame_0)[:select_nums] # 打乱后的行号 index_1 = np.random.permutation(num_pair_issame_1)[:select_nums] # 打乱后的行号 pair_issame_0 = pair_issame_0[index_0, :] # 获取打乱后的训练数据 pair_issame_1 = pair_issame_1[index_1, :] # 获取打乱后的训练数据 pair_issame = np.concatenate([pair_issame_0, pair_issame_1], axis=0) print("pair_issame_0 nums:{}".format(len(pair_issame_0))) print("pair_issame_1 nums:{}".format(len(pair_issame_1))) # image_list1 = pair_issame[:, 0] # image_list2 = pair_issame[:, 1] # issame_list = pair_issame[:, 2] print("have {} pairs".format(len(pair_issame))) return pair_issame
def convert_images_format(image_dir, out_dir): image_list = file_processing.get_files_list(image_dir, postfix=["*.bmp"]) for image_path in image_list: image = image_processing.read_image(image_path) basename = os.path.basename(image_path).replace("bmp", "jpg") # dest_path=os.path.join(out_dir,basename) dest_path = file_processing.create_dir(out_dir, dir1=None, filename=basename) image_processing.save_image(dest_path, image, toUINT8=False)
def ramdom_select_image_dir(image_dir, dest_dir): select_nums = 100 image_id = file_processing.get_sub_directory_list(image_dir) for id in image_id: image_list = file_processing.get_files_list(os.path.join(image_dir, id), postfix=['*.jpg', "*.jpeg", '*.png', "*.JPG"]) image_list = np.random.permutation(image_list)[:select_nums] for src_path in image_list: basename = os.path.basename(src_path) dest_path = file_processing.create_dir(dest_dir, id, basename) shutil.copy(src_path, dest_path)
def select_image_dir(image_dir, dest_dir): image_id = file_processing.get_sub_directory_list(image_dir) for id in image_id: image_list = file_processing.get_files_list(os.path.join(image_dir, id), postfix=['*.jpg', "*.jpeg", '*.png', "*.bmp"]) for src_path in image_list: basename = os.path.basename(src_path) index = basename.split(".")[0].split("_")[1] if index == "0": dest_path = file_processing.create_dir(dest_dir, id, basename) # shutil.copy(src_path, dest_path) file_processing.move_file(src_path, dest_path)
def image_to_facebank(image_dir, dest_dir): from xpinyin import Pinyin p = Pinyin() image_list = file_processing.get_files_list(image_dir, postfix=['*.jpg', "*.jpeg", '*.png', "*.JPG"]) nums_images = len(image_list) print("have ID:{}".format(nums_images)) for image_path in image_list: basename = os.path.basename(image_path) id_name = basename.split(".")[0] id_name = p.get_pinyin(id_name, '') dest_path = file_processing.create_dir(dest_dir, id_name, basename) file_processing.copy_file(image_path, dest_path)
def convert_image_format(image_dir, dest_dir, resize_width=None, dest_format='.jpg'): image_id = file_processing.get_sub_directory_list(image_dir) for id in image_id: image_list = file_processing.get_files_list( os.path.join(image_dir, id), postfix=['*.jpg', "*.jpeg", '*.png', "*.JPG"]) print("processing :{}".format(id)) for src_path in image_list: basename = os.path.basename(src_path).split('.')[0] image = image_processing.read_image_gbk(src_path, resize_width=resize_width) dest_path = file_processing.create_dir(dest_dir, id, basename + dest_format) file_processing.create_file_path(dest_path) image_processing.save_image(dest_path, image)
def select_facebank_detect(image_dir, dest_dir, id_nums=None, detect_face=True): if detect_face: # model_path = "../../face_detection/face_detection_rbf.pth" # model_path = "/media/dm/dm1/git/python-learning-notes/libs/ultra_ligh_face/face_detection_rbf.pth" model_path = "/home/panjinquan/project/python-learning-notes//libs/ultra_ligh_face/face_detection_rbf.pth" network = "RFB" confidence_threshold = 0.85 nms_threshold = 0.3 top_k = 500 keep_top_k = 750 device = "cuda:0" detector = UltraLightFaceDetector(model_path=model_path, network=network, confidence_threshold=confidence_threshold, nms_threshold=nms_threshold, top_k=top_k, keep_top_k=keep_top_k, device=device) per_nums = 1 image_id = file_processing.get_sub_directory_list(image_dir) nums_images = len(image_id) print("have ID:{}".format(nums_images)) if id_nums: id_nums = min(id_nums, nums_images) image_id = image_id[:id_nums] print("select ID:{}".format(len(image_id))) for id in image_id: image_list = file_processing.get_files_list(os.path.join(image_dir, id), postfix=['*.jpg', "*.jpeg", '*.png', "*.JPG"]) count = 0 for src_path in image_list: basename = os.path.basename(src_path) if detect_face: bgr_image = cv2.imread(src_path) bboxes, scores, landms = detector.detect(bgr_image, isshow=True) if not len(bboxes) == 1: print("no face:{}".format(src_path)) continue if count >= per_nums: break count += 1 dest_path = file_processing.create_dir(dest_dir, id, basename) file_processing.copy_file(src_path, dest_path)
def select_facebank(image_dir, dest_dir, id_nums=10): per_nums = 1 image_id = file_processing.get_sub_directory_list(image_dir) nums_images = len(image_id) print("have ID:{}".format(nums_images)) if id_nums: id_nums = min(id_nums, nums_images) image_id = image_id[:id_nums] print("select ID:{}".format(len(image_id))) for id in image_id: image_list = file_processing.get_files_list(os.path.join(image_dir, id), postfix=['*.jpg', "*.jpeg", '*.png', "*.JPG"]) count = 0 for src_path in image_list: basename = os.path.basename(src_path) if count >= per_nums: break count += 1 dest_path = file_processing.create_dir(dest_dir, id, basename) file_processing.copy_file(src_path, dest_path)
def face_body_for_image(shuffle=False): out_train_val_path = "/media/dm/dm2/project/dataset/face_body/SSD" # 输出 train/val 文件 annotations_dir = '/media/dm/dm2/project/dataset/face_body/face_body_dataset/Annotations' # face_body_test(annotations_dir, image_dir, classes, coordinatesType="SSD", show=True) label_out_dir = "/media/dm/dm2/project/dataset/face_body/SSD/label" image_dir = "/media/dm/dm2/project/dataset/face_body/SSD/trainval" image_list = file_processing.get_files_list(image_dir, postfix=["*.jpg"]) print("have {} images".format(len(image_list))) if shuffle: seeds = 100 # 固定种子,只要seed的值一样,后续生成的随机数都一样 random.seed(seeds) random.shuffle(image_list) # 分割成train和val数据集 factor = 0.95 train_num = int(factor * len(image_list)) train_image_list = image_list[:train_num] val_image_list = image_list[train_num:] train_image_id = convert_facebody_to_textdataset(train_image_list, annotations_dir, label_out_dir, classes, coordinatesType="SSD", show=False) val_image_id = convert_facebody_to_textdataset(val_image_list, annotations_dir, label_out_dir, classes, coordinatesType="SSD", show=False) print("done...ok!") # # 保存图片id数据 train_id_path = os.path.join(out_train_val_path, "train.txt") val_id_path = os.path.join(out_train_val_path, "val.txt") comment.save_id(train_id_path, train_image_id, val_id_path, val_image_id) batch_label_test(label_out_dir, image_dir, classes)
def rename_image_dir(dataset_dir, prefix="ID", add_sub=False): image_list = file_processing.get_files_list(dataset_dir, postfix=['*.jpg', "*.png"]) for image_path in image_list: format = os.path.basename(image_path).split(".")[-1] dirname = os.path.dirname(image_path) sub = image_path.split(os.sep)[-2] # basename=os.path.basename(image_path) index = 0 newName = [prefix] if add_sub: newName += [sub] newName += ['{:0=5}.{}'.format(index, format)] newName = "_".join(newName) newpath = os.path.join(dirname, newName) while os.path.exists(newpath): index += 1 newName += ['{:0=5}.{}'.format(index, format)] newName = "_".join(newName) newpath = os.path.join(dirname, newName) print(image_path) print(newName) os.rename(image_path, newpath)
def detect_image_dir(self, image_dir): image_list = file_processing.get_files_list(image_dir, postfix=["*.jpg"]) for image_path in image_list: image = cv2.imread(image_path) self.detect(image, ishow=True)
def batch_label_test(label_dir, image_dir, classes): file_list = file_processing.get_files_list(label_dir, postfix=[".txt"]) for filename in file_list: label_test(image_dir, filename, class_names=classes)