Exemple #1
0
def check(data_dir):
    imgs = get_files(data_dir, 'jpg')
    xmls = get_files(data_dir, 'xml')

    xmls_set = set([xml.replace(".xml", "") for xml in xmls])
    imgs_set = set([img.replace(".jpg", "") for img in imgs])

    diff = imgs_set.difference(xmls_set)
    print(len(diff))
Exemple #2
0
def find_invalid_jpgs(data_path):
    print("start to check!")
    imgs = get_files(data_path, 'jpg')
    print("there are %d files !" % len(imgs))
    for img in imgs:
        if not is_valid_jpg(img):
            print(img)
Exemple #3
0
def rename_file(data_dir):
    xmls = get_files(data_dir, 'xml')
    print("there are %d xmls" % len(xmls))

    for xml in xmls:
        tree = ET.ElementTree(file=xml)
        name = os.path.split(xml)[-1].replace(".xml", '.jpg')
        print(name)
        tree.find('./filename').text = name

        tree.write(xml)
Exemple #4
0
def get_shape(img_path):
	imgs = get_files(img_path, 'jpg')
	print("there are %d imgages in total !"%len(imgs))
	#img_shape = {}
	for img in imgs:
		height, width, _ = cv2.imread(img).shape
		print(img)
		xml_path = img.replace('jpg','xml')
		tree = ET.ElementTree(file=xml_path)
		tree.find('./size/width').text = str(int(width))
		tree.find('./size/height').text = str(int(height))
		tree.write(xml_path)
Exemple #5
0
def find_invalid_xmls(data_dir):
	xmls = get_files(data_dir,suffix='xml')
	print("there are %d files need to check"%len(xmls))
	#valid_xmls = []
	for xml in xmls:
		invalid_bboxes,info = check_bboxes(xml)
		if len(invalid_bboxes) > 0:
			print(xml)
			print(info)
			#print(len(invalid_bboxes))
			for bndbox in invalid_bboxes:
				print(format_bbox(bndbox))
Exemple #6
0
def get_stats(path):
    xmls = get_files(path)
    labels = {}
    for xml in xmls:
        tree = ET.ElementTree(file=xml)
        names = [k.text for k in tree.findall("/object/name")]
        for name in names:
            if name not in labels:
                labels[name] = 1
            else:
                labels[name] += 1
    for key, value in labels.items():
        print(key, "\t", value)
Exemple #7
0
def get_stats_of_bboxes(xml_dir, save_dir):
    xmls = get_files(xml_dir, ".xml")
    statistics = defaultdict(list)
    for xml in xmls:
        tree = ET.ElementTree(file=xml)
        objs = tree.findall("./object")
        for obj in objs:
            name = obj.find("./name")
            xmin = obj.find('./bndbox/xmin').text
            xmax = obj.find("./bndbox/xmax").text
            ymin = obj.find("./bndbox/ymin").text
            ymax = obj.find('./bndbox/ymax').text
            statistics[name].append([int(k) for k in [xmin, xmax, ymin, ymax]])
    with open(os.path.join(save_dir, 'statistics.pickle'), 'rb') as f:
        pickle.dump(statistics, f)
Exemple #8
0
def get_mean_of_rgb(path, njob=1):
    imgs = get_files(path, 'jpg')
    nums_img = len(imgs)
    block = nums_img // njob
    pool = Pool(processes=njob)
    res = []
    for k in njob:
        if k != njob - 1:
            #imgs_tmp = nums_img[k*block:(k+1)*block]
            res.append(
                pool.apply_async(_sum_rgb,
                                 (imgs[k * block:(k + 1) * block], )))
        else:
            res.append(pool.apply_async(_sum_rgb, (imgs[k * block:], )))
    if len(res) != njob:
        raise ValueError("some error!")
    r = 0
    g = 0
    b = 0
    for k in res:
        r += k[0]
        g += k[1]
        b += k[2]
    return r / nums_img, g / nums_img, b / nums_img
Exemple #9
0
def find_invalid_jpgs(data_path):
    imgs = get_files(data_path, 'jpg')

    for img in imgs:
        if not is_valid_jpg(img):
            print(img)