def move_test_data_set(dir): # read test data list front_name_set = set( pb.scan_text(os.path.join(dir, 'ImageSets', 'Main', 'test.txt'))) if len(front_name_set) == 0: return # make save path save_root_path = os.path.join(dir, 'test_part') save_jpg_path = os.path.join(save_root_path, 'JPEGImages') save_xml_path = os.path.join(save_root_path, 'Annotations') pb.makedirs(save_jpg_path) pb.makedirs(save_xml_path) if os.path.isdir(save_jpg_path) == False or os.path.isdir( save_xml_path) == False: print('Move test data to {0} is not allowed'.format(save_root_path)) # read all voc data list jpg_path = os.path.join(dir, 'JPEGImages') xml_path = os.path.join(dir, 'Annotations') img_exts = '.jpg.jpeg.png.JPG.JPEG.PNG' pairs,o1,o2 = pb.scan_pair(jpg_path, xml_path, img_exts, '.xml',\ with_root_dir=False, \ with_ext=True) pairs_front_name = {pb.splitext(p[0])[0]: p for p in pairs} # moving print('Moving test data to {0} ......'.format(save_root_path)) for front_name in tqdm.tqdm(front_name_set): p = pairs_front_name[front_name] jpg_full_path = os.path.join(jpg_path, p[0]) xml_full_path = os.path.join(jpg_path, p[1]) shutil.move(jpg_full_path, save_jpg_path) shutil.move(xml_full_path, save_xml_path) return
def draw_voc(voc_root_path, save_root_path=None): jpeg_path = os.path.join(voc_root_path, 'JPEGImages') anno_path = os.path.join(voc_root_path, 'Annotations') pairs, others_in_jpeg, others_in_anno = pb.scan_pair( jpeg_path, anno_path, '.jpg.jpeg.JPG.JPEG', '.xml', True, True) if save_root_path is None: save_root_path = os.path.join(voc_root_path, 'draw') pb.makedirs(save_root_path) #TODO: to speed up label_color = {} random.seed(10086) for img_path, xml_path in tqdm.tqdm(pairs, ncols=55): #read xml = pb.voc.xml_read(xml_path) img = cv2.imread(img_path) img_full_name = os.path.basename(img_path) img_save_path = os.path.join(save_root_path, img_full_name) for obj in xml.objs: label = obj.name color = label_color.get(label) if color is None: color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) label_color[label] = color cv2.rectangle(img, (int(obj.xmin),int(obj.ymin)), \ (int(obj.xmax),int(obj.ymax)), color, 3) cv2.putText(img, label, \ (int(obj.xmin*0.5+obj.xmax*0.5),int(obj.ymin*0.5+obj.ymax*0.5)), \ cv2.FONT_HERSHEY_COMPLEX, 1, color, 3) cv2.imwrite(img_save_path, img) return
def split_voc_data_set(dir, train, val, test=None): # read list pairs,o1,o2 = pb.scan_pair(os.path.join(dir,'JPEGImages'),\ os.path.join(dir,'Annotations'),\ '.jpg.jpeg.png',\ '.xml',\ with_root_dir=False, \ with_ext=True) names = [x[1][:-4] for x in pairs] # random split random.shuffle(names) len_names = len(names) len_train = int(len_names * train) len_val = int(len_names * val) len_test = len_names - len_train - len_val train_list = names[:len_train] val_list = names[len_train:len_train + len_val] trainval_list = names[:len_train + len_val] test_list = names[len_train + len_val:] # write file save_dir = os.path.join(dir, 'ImageSets', 'Main') pb.makedirs(save_dir) __write_split_file(os.path.join(save_dir, 'trainval.txt'), trainval_list) __write_split_file(os.path.join(save_dir, 'train.txt'), train_list) __write_split_file(os.path.join(save_dir, 'val.txt'), val_list) __write_split_file(os.path.join(save_dir, 'test.txt'), test_list) return len_train, len_val, len_test
def resize_voc(args): print( 'Warning: Repeated encoding of images can lead to loss of information.' ) if args.save is None and args.quiet == 'NOT_MENTIONED': print('You are going to cover the source files. Continue? [Y/N]?') str_in = input() if (str_in != 'Y' and str_in != 'y'): sys.exit('user quit') jpg_dir = os.path.join(args.dir, 'JPEGImages') xml_dir = os.path.join(args.dir, 'Annotations') relative_pairs, o1, o2 = pb.scan_pair(jpg_dir, xml_dir, '.jpg.jpeg.JPG.JPEG', '.xml', False, True) wh = tuple(json.loads(args.wh)) voc_rszr = pb.voc.vocResizer(args.rtype, wh, args.inter) read_jpg_list = [os.path.join(jpg_dir, x[0]) for x in relative_pairs] read_xml_list = [os.path.join(xml_dir, x[1]) for x in relative_pairs] # save in local and cover files if (args.save is None): for imgrp, xmlrp in tqdm.tqdm(zip(read_jpg_list, read_xml_list)): img = cv2.imread(imgrp, cv2.IMREAD_UNCHANGED, ncols=55) if img.shape[1] == wh[0] and img.shape[0] == wh[1]: continue xml = pb.voc.xml_read(xmlrp) rimg, rxml = pb.voc.vocResize(img, xml, wh, interpolation=args.inter, rtype=args.rtype) cv2.imwrite(imgsp, rimg) pb.voc.xml_write(xmlsp, rxml) # save in another folder else: save_jpg_dir = os.path.join(args.save, 'JPEGImages') save_xml_dir = os.path.join(args.save, 'Annotations') pb.makedirs(save_jpg_dir) pb.makedirs(save_xml_dir) save_jpg_list = [ os.path.join(save_jpg_dir, x[0]) for x in relative_pairs ] save_xml_list = [ os.path.join(save_xml_dir, x[1]) for x in relative_pairs ] for imgrp,xmlrp,imgsp,xmlsp in \ tqdm.tqdm(zip(read_jpg_list,read_xml_list,save_jpg_list,save_xml_list)): img = cv2.imread(imgrp, cv2.IMREAD_UNCHANGED, ncols=55) if img.shape[1] == wh[0] and img.shape[0] == wh[1]: shutil.copy(imgrp, imgsp) shutil.copy(xmlrp, xmlsp) else: rimg = voc_rszr.imResize(img) cv2.imwrite(imgsp, rimg) voc_rszr.xmlResizeInDisk(xmlrp, xmlsp) return
def cut_voc(voc_root_path, rate=None): rate = 0 if rate is None else float(rate) jpeg_path = os.path.join(voc_root_path, 'JPEGImages') anno_path = os.path.join(voc_root_path, 'Annotations') pairs, others_in_jpeg, others_in_anno = pb.scan_pair( jpeg_path, anno_path, '.jpg.jpeg.JPG.JPEG', '.xml', True, True) save_root_path = os.path.join(voc_root_path, 'cutVoc') #TODO: to speed up save_path_dict = {} for img_path, xml_path in tqdm.tqdm(pairs, ncols=55): #read xml = pb.voc.xml_read(xml_path) if len(xml.objs) == 0: continue img = cv2.imread(img_path) #cut and save objs_index = 10000 img_front_name = pb.splitext(os.path.basename(xml_path))[0] for obj in xml.objs: obj_save_path = save_path_dict.get(obj.name) if obj_save_path is None: obj_save_path = os.path.join(save_root_path, obj.name) pb.makedirs(obj_save_path) save_path_dict[obj.name] = obj_save_path #broaden bndbox obj_w = obj.xmax + 1 - obj.xmin obj_h = obj.ymax + 1 - obj.ymin obj.xmin = max(0, int(obj.xmin - obj_w * rate)) obj.ymin = max(0, int(obj.ymin - obj_h * rate)) obj.xmax = min(img.shape[1] - 1, int(obj.xmax + obj_w * rate)) obj.ymax = min(img.shape[0] - 1, int(obj.ymax + obj_h * rate)) img_obj = img[obj.ymin:obj.ymax + 1, obj.xmin:obj.xmax + 1] save_full_name = '{0}_{1}.jpg'.format(img_front_name, objs_index) obj_save_full_path = os.path.join(obj_save_path, save_full_name) if cv2.imwrite(obj_save_full_path, img_obj) == False: print('Fail to save: \"{0}\", shape={1}'.format( obj_save_full_path, img_obj.shape)) objs_index += 1 return
def count_voc(voc_root_path): jpeg_path = os.path.join(voc_root_path, 'JPEGImages') anno_path = os.path.join(voc_root_path, 'Annotations') pairs, others_in_jpeg, others_in_anno = pb.scan_pair( jpeg_path, anno_path, '.jpg.jpeg', '.xml', True, True) with open(os.path.join(voc_root_path, 'countVoc.txt'), 'w') as fp: fp.write('******************************************************\n') fp.write('{0:<40} = {1:>6}\n'.format('effective data', len(pairs))) count = 0 obj_dict = {} for img_path, xml_path in pairs: for obj in pb.voc.xml_read(xml_path).objs: if obj_dict.get(obj.name) is None: obj_dict[obj.name] = 1 else: obj_dict[obj.name] += 1 count += 1 fp.write('{0:<40} = {1:>6}\n'.format('effective bndbox', count)) fp.write('******************************************************\n') label_list = list(obj_dict.keys()) label_list.sort() for label in label_list: fp.write('{0:<40} = {1:>6}\n'.format(label, obj_dict[label])) return
def move_voc(source_dir, begin, args_format, args_quiet): save_dir, name_format = os.path.split(args.format) if save_dir == '' and args_quiet: print('You are going to cover the source voc. Continue? [Y/N]?') str_in = input() if (str_in != 'Y' and str_in != 'y'): sys.exit('user quit') save_dir = source_dir if save_dir == '' else save_dir save_jpg_dir = os.path.join(save_dir, 'JPEGImages') save_xml_dir = os.path.join(save_dir, 'Annotations') pb.makedirs(save_jpg_dir) pb.makedirs(save_xml_dir) jpg_dir = os.path.join(source_dir, 'JPEGImages') xml_dir = os.path.join(source_dir, 'Annotations') # scan relative_pairs, o1, o2 = pb.scan_pair(jpg_dir, xml_dir, '.jpg.jpeg.JPG.JPEG', '.xml', False, True) if len(o1) != 0 or len(o2) != 0: print('There are other files not voc type...') for x in o1: print(x) for x in o2: print(x) name_list = make_move_list(name_format, begin, len(relative_pairs)) if len(name_list) == 0: return # move to tmp file jpg_save_list = [] xml_save_list = [] jpg_ext_list = [] xml_ext_list = [] for jpg_full_name, xml_full_name in relative_pairs: jpg_ext = pb.splitext(jpg_full_name)[1] jpg_read_path = os.path.join(jpg_dir, jpg_full_name) while 1: jpg_save_path = os.path.join( save_jpg_dir, '{0}{1}'.format(str(random.random())[2:], jpg_ext)) if not os.path.isfile(jpg_save_path): break jpg_save_list.append(jpg_save_path) jpg_ext_list.append(jpg_ext) shutil.move(jpg_read_path, jpg_save_path) xml_ext = pb.splitext(xml_full_name)[1] xml_read_path = os.path.join(xml_dir, xml_full_name) while 1: xml_save_path = os.path.join( save_xml_dir, '{0}{1}'.format(str(random.random())[2:], xml_ext)) if not os.path.isfile(xml_save_path): break xml_save_list.append(xml_save_path) xml_ext_list.append(xml_ext) shutil.move(xml_read_path, xml_save_path) # move to dir jpg_save_list = [] xml_save_list = [] jpg_ext_list = [] xml_ext_list = [] for jpg_read_path,xml_read_path,jpg_ext,xml_ext,name in zip(jpg_save_list,\ xml_save_list,\ jpg_ext_list,\ xml_ext_list): jpg_save_path = os.path.join(save_jpg_dir, '{0}{1}'.format(name, jpg_ext)) shutil.move(jpg_read_path, jpg_save_path) xml_save_path = os.path.join(save_xml_dir, '{0}{1}'.format(name, xml_ext)) xml = pb.voc.xml_read(xml_read_path) xml.filename = name pb.voc.xml_write(xml_save_path, xml) os.remove(xml_read_path) # move to save_dir try: os.rmdir(jpg_dir) except Exception as e: pass try: os.rmdir(xml_dir) except Exception as e: pass try: os.rmdir(source_dir) except Exception as e: pass return
def check_bndbox(voc_root_path, to_check_size): #read voc data list jpeg_path = os.path.join(voc_root_path, 'JPEGImages') anno_path = os.path.join(voc_root_path, 'Annotations') pairs, others_in_jpeg, others_in_anno = pb.scan_pair( jpeg_path, anno_path, '.jpg.jpeg.JPG.JPEG', '.xml', True, True) #make move path and move other files move_dir = os.path.join(voc_root_path, 'others') move_jpeg_dir = os.path.join(move_dir, 'JPEGImages') move_anno_dir = os.path.join(move_dir, 'Annotations') pb.makedirs(move_jpeg_dir) pb.makedirs(move_anno_dir) for x in others_in_jpeg: shutil.move(x, move_jpeg_dir) for x in others_in_anno: shutil.move(x, move_anno_dir) for f in pb.scan_folder(jpeg_path): shutil.move(os.path.join(jpeg_path, f), move_jpeg_dir) for f in pb.scan_folder(anno_path): shutil.move(os.path.join(anno_path, f), move_anno_dir) #adjust bndbox bad_img_size_list = [] for img_path, xml_path in tqdm.tqdm(pairs, ncols=55): #read xml xml = pb.voc.xml_read(xml_path) #check whether size matched if to_check_size: img_shape = cv2.imread(img_path).shape if img_shape[0] != xml.height or img_shape[1] != xml.width: bad_img_size_list.append((img_shape, xml, xml_path)) continue elif len(img_shape) == 3 and img_shape[2] != xml.depth: bad_img_size_list.append((img_shape, xml, xml_path)) continue #check voc no_change = pb.voc.adjust_bndbox(xml) == 0 #move no obj or save changed if len(xml.objs) == 0: #no obj shutil.move(img_path, move_jpeg_dir) shutil.move(xml_path, move_anno_dir) elif no_change == False: #is changed pb.voc.xml_write(xml_path, xml) #rmdir if is empty try: os.rmdir(move_jpeg_dir) except Exception as e: pass try: os.rmdir(move_anno_dir) except Exception as e: pass try: os.rmdir(move_dir) except Exception as e: pass #print bad size images bad_img_size_log = list() if len(bad_img_size_list) != 0: print('There are {0} images\' size not matched, such as:'.format( len(bad_img_size_list))) for img_shape, xml, xml_path in bad_img_size_list[:3]: one_log = '{0} img={1} vs xml=({2}, {3}, {4}).'.format(\ os.path.basename(xml_path),\ img_shape,\ xml.height, xml.width, xml.depth) print(one_log) if len(bad_img_size_list > 3): print('See others in \"size_error_list.txt\"') print('deal with the size errors and check again.') # save error list if len(bad_img_size_list) != 0: with open(os.path.join(args.dir, 'size_error_list.txt'), 'w') as fp: for img_shape, xml, xml_path in bad_img_size_list[:3]: one_log = '{0} img={1} vs xml=({2}, {3}, {4}).'.format(\ os.path.basename(xml_path),\ img_shape,\ xml.height, xml.width, xml.depth) fp.write(one_log + '\n') return len(bad_img_size_list) == 0