Exemple #1
0
def move_test_data_set(dir):
    # read test data list
    front_name_set = set(
        pb.scan_text(os.path.join(dir, 'ImageSets', 'Main', 'test.txt')))
    if len(front_name_set) == 0:
        return
    # make save path
    save_root_path = os.path.join(dir, 'test_part')
    save_jpg_path = os.path.join(save_root_path, 'JPEGImages')
    save_xml_path = os.path.join(save_root_path, 'Annotations')
    pb.makedirs(save_jpg_path)
    pb.makedirs(save_xml_path)
    if os.path.isdir(save_jpg_path) == False or os.path.isdir(
            save_xml_path) == False:
        print('Move test data to {0} is not allowed'.format(save_root_path))

    # read all voc data list
    jpg_path = os.path.join(dir, 'JPEGImages')
    xml_path = os.path.join(dir, 'Annotations')
    img_exts = '.jpg.jpeg.png.JPG.JPEG.PNG'
    pairs,o1,o2 = pb.scan_pair(jpg_path, xml_path, img_exts, '.xml',\
                                with_root_dir=False, \
                                with_ext=True)
    pairs_front_name = {pb.splitext(p[0])[0]: p for p in pairs}
    # moving
    print('Moving test data to {0} ......'.format(save_root_path))
    for front_name in tqdm.tqdm(front_name_set):
        p = pairs_front_name[front_name]
        jpg_full_path = os.path.join(jpg_path, p[0])
        xml_full_path = os.path.join(jpg_path, p[1])
        shutil.move(jpg_full_path, save_jpg_path)
        shutil.move(xml_full_path, save_xml_path)
    return
Exemple #2
0
def draw_voc(voc_root_path, save_root_path=None):
    jpeg_path = os.path.join(voc_root_path, 'JPEGImages')
    anno_path = os.path.join(voc_root_path, 'Annotations')
    pairs, others_in_jpeg, others_in_anno = pb.scan_pair(
        jpeg_path, anno_path, '.jpg.jpeg.JPG.JPEG', '.xml', True, True)
    if save_root_path is None:
        save_root_path = os.path.join(voc_root_path, 'draw')
        pb.makedirs(save_root_path)
    #TODO: to speed up
    label_color = {}
    random.seed(10086)
    for img_path, xml_path in tqdm.tqdm(pairs, ncols=55):
        #read
        xml = pb.voc.xml_read(xml_path)
        img = cv2.imread(img_path)
        img_full_name = os.path.basename(img_path)
        img_save_path = os.path.join(save_root_path, img_full_name)
        for obj in xml.objs:
            label = obj.name
            color = label_color.get(label)
            if color is None:
                color = (random.randint(0, 255), random.randint(0, 255),
                         random.randint(0, 255))
                label_color[label] = color
            cv2.rectangle(img, (int(obj.xmin),int(obj.ymin)), \
                          (int(obj.xmax),int(obj.ymax)), color, 3)
            cv2.putText(img, label, \
                        (int(obj.xmin*0.5+obj.xmax*0.5),int(obj.ymin*0.5+obj.ymax*0.5)), \
                        cv2.FONT_HERSHEY_COMPLEX, 1, color, 3)
        cv2.imwrite(img_save_path, img)
    return
Exemple #3
0
def split_voc_data_set(dir, train, val, test=None):
    # read list
    pairs,o1,o2 = pb.scan_pair(os.path.join(dir,'JPEGImages'),\
                                os.path.join(dir,'Annotations'),\
                                '.jpg.jpeg.png',\
                                '.xml',\
                                with_root_dir=False, \
                                with_ext=True)
    names = [x[1][:-4] for x in pairs]
    # random split
    random.shuffle(names)
    len_names = len(names)
    len_train = int(len_names * train)
    len_val = int(len_names * val)
    len_test = len_names - len_train - len_val
    train_list = names[:len_train]
    val_list = names[len_train:len_train + len_val]
    trainval_list = names[:len_train + len_val]
    test_list = names[len_train + len_val:]

    # write file
    save_dir = os.path.join(dir, 'ImageSets', 'Main')
    pb.makedirs(save_dir)
    __write_split_file(os.path.join(save_dir, 'trainval.txt'), trainval_list)
    __write_split_file(os.path.join(save_dir, 'train.txt'), train_list)
    __write_split_file(os.path.join(save_dir, 'val.txt'), val_list)
    __write_split_file(os.path.join(save_dir, 'test.txt'), test_list)

    return len_train, len_val, len_test
Exemple #4
0
def resize_voc(args):
    print(
        'Warning: Repeated encoding of images can lead to loss of information.'
    )
    if args.save is None and args.quiet == 'NOT_MENTIONED':
        print('You are going to cover the source files. Continue? [Y/N]?')
        str_in = input()
        if (str_in != 'Y' and str_in != 'y'):
            sys.exit('user quit')

    jpg_dir = os.path.join(args.dir, 'JPEGImages')
    xml_dir = os.path.join(args.dir, 'Annotations')

    relative_pairs, o1, o2 = pb.scan_pair(jpg_dir, xml_dir,
                                          '.jpg.jpeg.JPG.JPEG', '.xml', False,
                                          True)

    wh = tuple(json.loads(args.wh))
    voc_rszr = pb.voc.vocResizer(args.rtype, wh, args.inter)
    read_jpg_list = [os.path.join(jpg_dir, x[0]) for x in relative_pairs]
    read_xml_list = [os.path.join(xml_dir, x[1]) for x in relative_pairs]
    # save in local and cover files
    if (args.save is None):
        for imgrp, xmlrp in tqdm.tqdm(zip(read_jpg_list, read_xml_list)):
            img = cv2.imread(imgrp, cv2.IMREAD_UNCHANGED, ncols=55)
            if img.shape[1] == wh[0] and img.shape[0] == wh[1]:
                continue
            xml = pb.voc.xml_read(xmlrp)
            rimg, rxml = pb.voc.vocResize(img,
                                          xml,
                                          wh,
                                          interpolation=args.inter,
                                          rtype=args.rtype)
            cv2.imwrite(imgsp, rimg)
            pb.voc.xml_write(xmlsp, rxml)
    # save in another folder
    else:
        save_jpg_dir = os.path.join(args.save, 'JPEGImages')
        save_xml_dir = os.path.join(args.save, 'Annotations')
        pb.makedirs(save_jpg_dir)
        pb.makedirs(save_xml_dir)
        save_jpg_list = [
            os.path.join(save_jpg_dir, x[0]) for x in relative_pairs
        ]
        save_xml_list = [
            os.path.join(save_xml_dir, x[1]) for x in relative_pairs
        ]
        for imgrp,xmlrp,imgsp,xmlsp in \
                tqdm.tqdm(zip(read_jpg_list,read_xml_list,save_jpg_list,save_xml_list)):
            img = cv2.imread(imgrp, cv2.IMREAD_UNCHANGED, ncols=55)
            if img.shape[1] == wh[0] and img.shape[0] == wh[1]:
                shutil.copy(imgrp, imgsp)
                shutil.copy(xmlrp, xmlsp)
            else:
                rimg = voc_rszr.imResize(img)
                cv2.imwrite(imgsp, rimg)
                voc_rszr.xmlResizeInDisk(xmlrp, xmlsp)
    return
Exemple #5
0
def cut_voc(voc_root_path, rate=None):
    rate = 0 if rate is None else float(rate)
    jpeg_path = os.path.join(voc_root_path, 'JPEGImages')
    anno_path = os.path.join(voc_root_path, 'Annotations')
    pairs, others_in_jpeg, others_in_anno = pb.scan_pair(
        jpeg_path, anno_path, '.jpg.jpeg.JPG.JPEG', '.xml', True, True)
    save_root_path = os.path.join(voc_root_path, 'cutVoc')
    #TODO: to speed up
    save_path_dict = {}
    for img_path, xml_path in tqdm.tqdm(pairs, ncols=55):
        #read
        xml = pb.voc.xml_read(xml_path)
        if len(xml.objs) == 0:
            continue
        img = cv2.imread(img_path)
        #cut and save
        objs_index = 10000
        img_front_name = pb.splitext(os.path.basename(xml_path))[0]
        for obj in xml.objs:
            obj_save_path = save_path_dict.get(obj.name)
            if obj_save_path is None:
                obj_save_path = os.path.join(save_root_path, obj.name)
                pb.makedirs(obj_save_path)
                save_path_dict[obj.name] = obj_save_path
            #broaden bndbox
            obj_w = obj.xmax + 1 - obj.xmin
            obj_h = obj.ymax + 1 - obj.ymin
            obj.xmin = max(0, int(obj.xmin - obj_w * rate))
            obj.ymin = max(0, int(obj.ymin - obj_h * rate))
            obj.xmax = min(img.shape[1] - 1, int(obj.xmax + obj_w * rate))
            obj.ymax = min(img.shape[0] - 1, int(obj.ymax + obj_h * rate))
            img_obj = img[obj.ymin:obj.ymax + 1, obj.xmin:obj.xmax + 1]
            save_full_name = '{0}_{1}.jpg'.format(img_front_name, objs_index)
            obj_save_full_path = os.path.join(obj_save_path, save_full_name)
            if cv2.imwrite(obj_save_full_path, img_obj) == False:
                print('Fail to save: \"{0}\", shape={1}'.format(
                    obj_save_full_path, img_obj.shape))
            objs_index += 1
    return
Exemple #6
0
def count_voc(voc_root_path):
    jpeg_path = os.path.join(voc_root_path, 'JPEGImages')
    anno_path = os.path.join(voc_root_path, 'Annotations')
    pairs, others_in_jpeg, others_in_anno = pb.scan_pair(
        jpeg_path, anno_path, '.jpg.jpeg', '.xml', True, True)
    with open(os.path.join(voc_root_path, 'countVoc.txt'), 'w') as fp:
        fp.write('******************************************************\n')
        fp.write('{0:<40} = {1:>6}\n'.format('effective data', len(pairs)))
        count = 0
        obj_dict = {}
        for img_path, xml_path in pairs:
            for obj in pb.voc.xml_read(xml_path).objs:
                if obj_dict.get(obj.name) is None:
                    obj_dict[obj.name] = 1
                else:
                    obj_dict[obj.name] += 1
                count += 1
        fp.write('{0:<40} = {1:>6}\n'.format('effective bndbox', count))
        fp.write('******************************************************\n')
        label_list = list(obj_dict.keys())
        label_list.sort()
        for label in label_list:
            fp.write('{0:<40} = {1:>6}\n'.format(label, obj_dict[label]))
    return
Exemple #7
0
def move_voc(source_dir, begin, args_format, args_quiet):
    save_dir, name_format = os.path.split(args.format)
    if save_dir == '' and args_quiet:
        print('You are going to cover the source voc. Continue? [Y/N]?')
        str_in = input()
        if (str_in != 'Y' and str_in != 'y'):
            sys.exit('user quit')
    save_dir = source_dir if save_dir == '' else save_dir
    save_jpg_dir = os.path.join(save_dir, 'JPEGImages')
    save_xml_dir = os.path.join(save_dir, 'Annotations')
    pb.makedirs(save_jpg_dir)
    pb.makedirs(save_xml_dir)
    jpg_dir = os.path.join(source_dir, 'JPEGImages')
    xml_dir = os.path.join(source_dir, 'Annotations')
    # scan
    relative_pairs, o1, o2 = pb.scan_pair(jpg_dir, xml_dir,
                                          '.jpg.jpeg.JPG.JPEG', '.xml', False,
                                          True)
    if len(o1) != 0 or len(o2) != 0:
        print('There are other files not voc type...')
        for x in o1:
            print(x)
        for x in o2:
            print(x)
    name_list = make_move_list(name_format, begin, len(relative_pairs))
    if len(name_list) == 0:
        return
    # move to tmp file
    jpg_save_list = []
    xml_save_list = []
    jpg_ext_list = []
    xml_ext_list = []
    for jpg_full_name, xml_full_name in relative_pairs:
        jpg_ext = pb.splitext(jpg_full_name)[1]
        jpg_read_path = os.path.join(jpg_dir, jpg_full_name)
        while 1:
            jpg_save_path = os.path.join(
                save_jpg_dir,
                '{0}{1}'.format(str(random.random())[2:], jpg_ext))
            if not os.path.isfile(jpg_save_path):
                break
        jpg_save_list.append(jpg_save_path)
        jpg_ext_list.append(jpg_ext)
        shutil.move(jpg_read_path, jpg_save_path)

        xml_ext = pb.splitext(xml_full_name)[1]
        xml_read_path = os.path.join(xml_dir, xml_full_name)
        while 1:
            xml_save_path = os.path.join(
                save_xml_dir,
                '{0}{1}'.format(str(random.random())[2:], xml_ext))
            if not os.path.isfile(xml_save_path):
                break
        xml_save_list.append(xml_save_path)
        xml_ext_list.append(xml_ext)
        shutil.move(xml_read_path, xml_save_path)

    # move to dir
    jpg_save_list = []
    xml_save_list = []
    jpg_ext_list = []
    xml_ext_list = []
    for jpg_read_path,xml_read_path,jpg_ext,xml_ext,name in zip(jpg_save_list,\
                                                                xml_save_list,\
                                                                jpg_ext_list,\
                                                                xml_ext_list):
        jpg_save_path = os.path.join(save_jpg_dir,
                                     '{0}{1}'.format(name, jpg_ext))
        shutil.move(jpg_read_path, jpg_save_path)

        xml_save_path = os.path.join(save_xml_dir,
                                     '{0}{1}'.format(name, xml_ext))
        xml = pb.voc.xml_read(xml_read_path)
        xml.filename = name
        pb.voc.xml_write(xml_save_path, xml)
        os.remove(xml_read_path)
    # move to save_dir
    try:
        os.rmdir(jpg_dir)
    except Exception as e:
        pass
    try:
        os.rmdir(xml_dir)
    except Exception as e:
        pass
    try:
        os.rmdir(source_dir)
    except Exception as e:
        pass
    return
Exemple #8
0
def check_bndbox(voc_root_path, to_check_size):
    #read voc data list
    jpeg_path = os.path.join(voc_root_path, 'JPEGImages')
    anno_path = os.path.join(voc_root_path, 'Annotations')
    pairs, others_in_jpeg, others_in_anno = pb.scan_pair(
        jpeg_path, anno_path, '.jpg.jpeg.JPG.JPEG', '.xml', True, True)

    #make move path and move other files
    move_dir = os.path.join(voc_root_path, 'others')
    move_jpeg_dir = os.path.join(move_dir, 'JPEGImages')
    move_anno_dir = os.path.join(move_dir, 'Annotations')
    pb.makedirs(move_jpeg_dir)
    pb.makedirs(move_anno_dir)
    for x in others_in_jpeg:
        shutil.move(x, move_jpeg_dir)
    for x in others_in_anno:
        shutil.move(x, move_anno_dir)
    for f in pb.scan_folder(jpeg_path):
        shutil.move(os.path.join(jpeg_path, f), move_jpeg_dir)
    for f in pb.scan_folder(anno_path):
        shutil.move(os.path.join(anno_path, f), move_anno_dir)

    #adjust bndbox
    bad_img_size_list = []
    for img_path, xml_path in tqdm.tqdm(pairs, ncols=55):
        #read xml
        xml = pb.voc.xml_read(xml_path)
        #check whether size matched
        if to_check_size:
            img_shape = cv2.imread(img_path).shape
            if img_shape[0] != xml.height or img_shape[1] != xml.width:
                bad_img_size_list.append((img_shape, xml, xml_path))
                continue
            elif len(img_shape) == 3 and img_shape[2] != xml.depth:
                bad_img_size_list.append((img_shape, xml, xml_path))
                continue
        #check voc
        no_change = pb.voc.adjust_bndbox(xml) == 0
        #move no obj or save changed
        if len(xml.objs) == 0:  #no obj
            shutil.move(img_path, move_jpeg_dir)
            shutil.move(xml_path, move_anno_dir)
        elif no_change == False:  #is changed
            pb.voc.xml_write(xml_path, xml)

    #rmdir if is empty
    try:
        os.rmdir(move_jpeg_dir)
    except Exception as e:
        pass
    try:
        os.rmdir(move_anno_dir)
    except Exception as e:
        pass
    try:
        os.rmdir(move_dir)
    except Exception as e:
        pass

    #print bad size images
    bad_img_size_log = list()
    if len(bad_img_size_list) != 0:
        print('There are {0} images\' size not matched, such as:'.format(
            len(bad_img_size_list)))
        for img_shape, xml, xml_path in bad_img_size_list[:3]:
            one_log = '{0}  img={1} vs xml=({2}, {3}, {4}).'.format(\
                    os.path.basename(xml_path),\
                    img_shape,\
                    xml.height, xml.width, xml.depth)
            print(one_log)
            if len(bad_img_size_list > 3):
                print('See others in \"size_error_list.txt\"')
            print('deal with the size errors and check again.')

    # save error list
    if len(bad_img_size_list) != 0:
        with open(os.path.join(args.dir, 'size_error_list.txt'), 'w') as fp:
            for img_shape, xml, xml_path in bad_img_size_list[:3]:
                one_log = '{0}  img={1} vs xml=({2}, {3}, {4}).'.format(\
                    os.path.basename(xml_path),\
                    img_shape,\
                    xml.height, xml.width, xml.depth)
                fp.write(one_log + '\n')
    return len(bad_img_size_list) == 0