def process_annotated_json(class_names, filename, output_dir, label_dict): """translate to image and xml""" # file nam base base = get_base_name(filename) # src image file out_img_file = osp.join(output_dir, "JPEGImages", base + ".jpg") # annotation xml file out_xml_file = osp.join(output_dir, "Annotations", base + ".xml") # viz image file out_viz_file = osp.join(output_dir, "AnnotationsVisualization", base + ".jpg") label_file = labelme.LabelFile(filename=filename) # save source image img = labelme.utils.img_data_to_arr(label_file.imageData) imgviz.io.imsave(out_img_file, img) # get xml (xml, bboxes, labels) = get_xml_with_labelfile(label_file, base, label_dict, class_names) # save visualized image save_visualization_image(img, labels, bboxes, class_names, output_file=out_viz_file) # save voc annotation to xml file with open(out_xml_file, "wb") as out_f: out_f.write(lxml.etree.tostring(xml, pretty_print=True))
def generateCocoJson(self, label_files): """ label_files: iterateble files """ out_ann_file = osp.join(self.output_dir, "annotations.json") for image_id, filename in tqdm(enumerate(label_files)): # print("Generating dataset from:", filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(self.output_dir, "JPEGImages", base + ".jpg") img = labelme.utils.img_data_to_arr(label_file.imageData) Image.fromarray(img).convert("RGB").save(out_img_file) self.data["images"].append( dict( license=0, url=None, file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)), height=img.shape[0], width=img.shape[1], date_captured=None, id=image_id, )) self._getAnno(label_file, img, image_id) if self.debug: break
def output(self, ordered_keys=None): """ Arguments: ordered_keys(list(str)) """ for filename in tqdm(glob.glob(osp.join(self.input_dir, "*.json"))): # print("Generating dataset from:", filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(self.output_dir, "JPEGImages", base + ".jpg") out_lbl_file = osp.join(self.output_dir, "SegmentationClass", base + ".npy") out_png_file = osp.join(self.output_dir, "SegmentationClassPNG", base + ".png") if not self.noviz: out_viz_file = osp.join( self.output_dir, "SegmentationClassVisualization", base + ".jpg", ) with open(out_img_file, "wb") as f: f.write(label_file.imageData) img = labelme.utils.img_data_to_arr(label_file.imageData) if ordered_keys is not None: newshapes = [] for ok in ordered_keys: for shape in label_file.shapes: if shape["label"] == ok: newshapes.append(shape) label_file.shapes = newshapes if self.debug: print(label_file.shapes) break lbl, _ = labelme.utils.shapes_to_label( img_shape=img.shape, shapes=label_file.shapes, label_name_to_value=self.class_name_to_id, ) labelme.utils.lblsave(out_png_file, lbl) # np.save(out_lbl_file, lbl) if not self.noviz: if img.shape[0] == 1: # gray img img = imgviz.rgb2gray(img) viz = imgviz.label2rgb( label=lbl, # img=imgviz.rgb2gray(img), img=img, font_size=15, label_names=self.class_names, loc="rb", ) imgviz.io.imsave(out_viz_file, viz)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--input_dir', help='input annotated directory') parser.add_argument('--output_dir', help='output dataset directory') parser.add_argument('--labels', help='labels file', required=True) args = parser.parse_args() if osp.exists(args.output_dir): print('Output directory already exists:', args.output_dir) sys.exit(1) os.makedirs(args.output_dir) print('Creating masks:', args.output_dir) class_names = [] class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i class_name = line.strip() class_name_to_id[class_name] = class_id class_names.append(class_name) class_names = tuple(class_names) print('class_names:', class_names) out_class_names_file = osp.join(args.output_dir, 'class_names.txt') with open(out_class_names_file, 'w') as f: f.writelines('\n'.join(class_names)) print('Saved class_names:', out_class_names_file) for filename in glob.glob(osp.join(args.input_dir, '*.json')): print('Generating dataset from:', filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_png_file = osp.join(args.output_dir, base + '.png') #img = labelme.utils.img_data_to_arr(label_file.imageData) size = [s for s in reversed(label_file.imgsize)] #Takes (height,width) lbl, _ = labelme.utils.shapes_to_label( img_shape=size, shapes=label_file.shapes, label_name_to_value=class_name_to_id, ) lblsave(out_png_file, lbl)
def main(): os.makedirs(args.output_dir, exist_ok=True) os.makedirs(osp.join(args.output_dir, "JPEGImages"), exist_ok=True) if not args.noviz: os.makedirs(osp.join(args.output_dir, "Visualization"), exist_ok=True) print("Creating dataset:", args.output_dir) now = datetime.datetime.now() data = dict( info=dict( description=None, url=None, version=None, year=now.year, contributor=None, date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"), ), licenses=[dict( url=None, id=0, name=None, )], images=[ # license, url, file_name, height, width, date_captured, id ], type="instances", annotations=[ # segmentation, area, iscrowd, image_id, bbox, category_id, id ], categories=[ # supercategory, id, name ], ) class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): print(line, i) class_id = i - 1 # starts with -1 class_name = line.strip() if class_id == -1: continue class_name_to_id[class_name] = class_id keypointsNm = ['c' + str(no) for no in range(1, 11, 1)] skeleton = [[1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [7, 8], [8, 9], [9, 10]] data["categories"].append( dict( supercategory=None, id=class_id, keypoints=keypointsNm, name=class_name, skeleton=skeleton, )) out_ann_file = osp.join(args.output_dir, "annotations.json") print('op', out_ann_file) label_files = glob.glob(osp.join(args.input_dir, "*.json")) for image_id, filename in enumerate(label_files): print("Generating dataset from:", filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg") img = labelme.utils.img_data_to_arr(label_file.imageData) imgviz.io.imsave(out_img_file, img) data["images"].append( dict( license=0, url=None, file_name=osp.basename( osp.relpath(out_img_file, osp.dirname(out_ann_file))), height=img.shape[0], width=img.shape[1], date_captured=None, id=image_id, )) masks = {} # for area segmentations = collections.defaultdict(list) # for segmentation allckeypoints = [] allrkeypoints = [] for shape in label_file.shapes: points = shape["points"] label = shape["label"] group_id = shape.get("group_id") shape_type = shape.get("shape_type", "polygon") mask = labelme.utils.shape_to_mask(img.shape[:2], points, shape_type) if group_id is None: group_id = uuid.uuid1() instance = (label, group_id) if instance in masks: masks[instance] = masks[instance] | mask else: masks[instance] = mask if shape_type == "rectangle": (x1, y1), (x2, y2) = points x1, x2 = sorted([x1, x2]) y1, y2 = sorted([y1, y2]) points = [x1, y1, x2, y1, x2, y2, x1, y2] elif shape_type == "point": (x1, y1) = points[0] if label == 'c': allckeypoints.append([x1, y1]) if label == 'r': allrkeypoints.append([x1, y1]) else: points = np.asarray(points).flatten().tolist() segmentations[instance].append(points) segmentations = dict(segmentations) for instance, mask in masks.items(): cls_name, group_id = instance if cls_name not in class_name_to_id: continue cls_id = class_name_to_id[cls_name] mask = np.asfortranarray(mask.astype(np.uint8)) mask = pycocotools.mask.encode(mask) area = float(pycocotools.mask.area(mask)) bbox = pycocotools.mask.toBbox(mask).flatten().tolist() ckeypoints = [] for kp in allckeypoints: if pointInRect(kp, bbox): ckeypoints.append([kp[0], kp[1], 2]) # rkeypoints=[] # for kp in allrkeypoints: # if pointInRect(kp,bbox): # rkeypoints.append([kp[0],kp[1],2]) ckeypoints = np.array( sorted(ckeypoints, key=lambda x: x[0], reverse=False)) # rkeypoints=np.array(sorted(rkeypoints, key=lambda y: y[1], reverse=False)) c_num_keypoints = int(ckeypoints.shape[0]) # r_num_keypoints=int(rkeypoints.shape[0]) if c_num_keypoints > 0: keypoints_empty = np.zeros((10, 3)) keypoints_empty[0:ckeypoints.shape[0], 0:ckeypoints.shape[1]] = ckeypoints ckeypoints = keypoints_empty.ravel().tolist() else: keypoints_empty = np.zeros((10, 3)) ckeypoints = keypoints_empty.ravel().tolist() # if r_num_keypoints>0: # keypoints_empty=b = np.zeros((25,3)) # keypoints_empty[0:rkeypoints.shape[0],0:rkeypoints.shape[1]]=rkeypoints # rkeypoints=keypoints_empty.ravel().tolist() # else: # rkeypoints=[] # keypoints = np.hstack((ckeypoints,rkeypoints)).tolist() keypoints = ckeypoints kdict = dict( id=len(data["annotations"]), image_id=image_id, category_id=cls_id, segmentation=segmentations[instance], area=area, bbox=bbox, iscrowd=0, #r_num_keypoints=r_num_keypoints ) if len(keypoints) > 0: kdict['keypoints'] = keypoints kdict['c_num_keypoints'] = c_num_keypoints data["annotations"].append(kdict) if not args.noviz: labels, captions, masks = zip(*[(class_name_to_id[cnm], cnm, msk) for (cnm, gid), msk in masks.items() if cnm in class_name_to_id]) viz = imgviz.instances2rgb( image=img, labels=labels, masks=masks, captions=captions, font_size=15, line_width=2, ) out_viz_file = osp.join(args.output_dir, "Visualization", base + ".jpg") imgviz.io.imsave(out_viz_file, viz) with open(out_ann_file, "w") as f: json.dump(data, f) print(out_ann_file)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("input_dir", help="input annotated directory") parser.add_argument("output_dir", help="output dataset directory") parser.add_argument("--labels", help="labels file", required=True) parser.add_argument("--noviz", help="no visualization", action="store_true") args = parser.parse_args() if osp.exists(args.output_dir): print("Output directory already exists:", args.output_dir) sys.exit(1) os.makedirs(args.output_dir) os.makedirs(osp.join(args.output_dir, "JPEGImages")) os.makedirs(osp.join(args.output_dir, "Annotations")) if not args.noviz: os.makedirs(osp.join(args.output_dir, "AnnotationsVisualization")) print("Creating dataset:", args.output_dir) class_names = [] class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() class_name_to_id[class_name] = class_id if class_id == -1: assert class_name == "__ignore__" continue elif class_id == 0: assert class_name == "_background_" class_names.append(class_name) class_names = tuple(class_names) print("class_names:", class_names) out_class_names_file = osp.join(args.output_dir, "class_names.txt") with open(out_class_names_file, "w") as f: f.writelines("\n".join(class_names)) print("Saved class_names:", out_class_names_file) for filename in glob.glob(osp.join(args.input_dir, "*.json")): print("Generating dataset from:", filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg") out_xml_file = osp.join(args.output_dir, "Annotations", base + ".xml") if not args.noviz: out_viz_file = osp.join(args.output_dir, "AnnotationsVisualization", base + ".jpg") img = labelme.utils.img_data_to_arr(label_file.imageData) imgviz.io.imsave(out_img_file, img) maker = lxml.builder.ElementMaker() xml = maker.annotation( maker.folder(), maker.filename(base + ".jpg"), maker.database(), # e.g., The VOC2007 Database maker.annotation(), # e.g., Pascal VOC2007 maker.image(), # e.g., flickr maker.size( maker.height(str(img.shape[0])), maker.width(str(img.shape[1])), maker.depth(str(img.shape[2])), ), maker.segmented(), ) bboxes = [] labels = [] for shape in label_file.shapes: if shape["shape_type"] != "rectangle": print("Skipping shape: label={label}, " "shape_type={shape_type}".format(**shape)) continue class_name = shape["label"] class_id = class_names.index(class_name) (xmin, ymin), (xmax, ymax) = shape["points"] # swap if min is larger than max. xmin, xmax = sorted([xmin, xmax]) ymin, ymax = sorted([ymin, ymax]) bboxes.append([ymin, xmin, ymax, xmax]) labels.append(class_id) xml.append( maker.object( maker.name(shape["label"]), maker.pose(), maker.truncated(), maker.difficult(), maker.bndbox( maker.xmin(str(xmin)), maker.ymin(str(ymin)), maker.xmax(str(xmax)), maker.ymax(str(ymax)), ), )) if not args.noviz: captions = [class_names[label] for label in labels] viz = imgviz.instances2rgb( image=img, labels=labels, bboxes=bboxes, captions=captions, font_size=15, ) imgviz.io.imsave(out_viz_file, viz) with open(out_xml_file, "wb") as f: f.write(lxml.etree.tostring(xml, pretty_print=True))
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--input_dir", default="data_annotated", help="input annotated directory") parser.add_argument("--output_dir", default="data_dataset_voc", help="output dataset directory") parser.add_argument("--labels", default="labels.txt", help="labels file") parser.add_argument("--noviz", help="no visualization", action="store_true") args = parser.parse_args() if osp.exists(args.output_dir): print("Output directory already exists:", args.output_dir) sys.exit(1) os.makedirs(args.output_dir) os.makedirs(osp.join(args.output_dir, "JPEGImages")) os.makedirs(osp.join(args.output_dir, "SegmentationClass")) os.makedirs(osp.join(args.output_dir, "SegmentationClassPNG")) if not args.noviz: os.makedirs(osp.join(args.output_dir, "SegmentationClassVisualization")) print("Creating dataset:", args.output_dir) class_names = [] class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() class_name_to_id[class_name] = class_id if class_id == -1: assert class_name == "__ignore__" continue elif class_id == 0: assert class_name == "_background_" class_names.append(class_name) class_names = tuple(class_names) print("class_names:", class_names) out_class_names_file = osp.join(args.output_dir, "class_names.txt") with open(out_class_names_file, "w") as f: f.writelines("\n".join(class_names)) print("Saved class_names:", out_class_names_file) for filename in glob.glob(osp.join(args.input_dir, "*.json")): print("Generating dataset from:", filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg") out_lbl_file = osp.join(args.output_dir, "SegmentationClass", base + ".npy") out_png_file = osp.join(args.output_dir, "SegmentationClassPNG", base + ".png") if not args.noviz: out_viz_file = osp.join( args.output_dir, "SegmentationClassVisualization", base + ".jpg", ) with open(out_img_file, "wb") as f: f.write(label_file.imageData) img = labelme.utils.img_data_to_arr(label_file.imageData) lbl, _ = labelme.utils.shapes_to_label( img_shape=img.shape, shapes=label_file.shapes, label_name_to_value=class_name_to_id, ) labelme.utils.lblsave(out_png_file, lbl) np.save(out_lbl_file, lbl) if not args.noviz: viz = imgviz.label2rgb( label=lbl, img=imgviz.rgb2gray(img), font_size=15, label_names=class_names, loc="rb", ) imgviz.io.imsave(out_viz_file, viz)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("input_dir", help="input annotated directory") parser.add_argument("output_dir", help="output dataset directory") parser.add_argument("--labels", help="labels file", required=True) parser.add_argument("--noviz", help="no visualization", action="store_true") args = parser.parse_args() if osp.exists(args.output_dir): print("Output directory already exists:", args.output_dir) sys.exit(1) os.makedirs(args.output_dir) os.makedirs(osp.join(args.output_dir, "JPEGImages")) if not args.noviz: os.makedirs(osp.join(args.output_dir, "Visualization")) print("Creating dataset:", args.output_dir) now = datetime.datetime.now() data = dict( info=dict( description=None, url=None, version=None, year=now.year, contributor=None, date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"), ), licenses=[dict( url=None, id=0, name=None, )], images=[ # license, url, file_name, height, width, date_captured, id ], type="instances", annotations=[ # segmentation, area, iscrowd, image_id, bbox, category_id, id ], categories=[ # supercategory, id, name ], ) class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() if class_id == -1: assert class_name == "__ignore__" continue class_name_to_id[class_name] = class_id data["categories"].append( dict( supercategory=None, id=class_id, name=class_name, )) out_ann_file = osp.join(args.output_dir, "annotations.json") label_files = glob.glob(osp.join(args.input_dir, "*.json")) for image_id, filename in enumerate(label_files): print("Generating dataset from:", filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg") img = labelme.utils.img_data_to_arr(label_file.imageData) imgviz.io.imsave(out_img_file, img) data["images"].append( dict( license=0, url=None, file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)), height=img.shape[0], width=img.shape[1], date_captured=None, id=image_id, )) masks = {} # for area segmentations = collections.defaultdict(list) # for segmentation for shape in label_file.shapes: points = shape["points"] label = shape["label"] group_id = shape.get("group_id") shape_type = shape.get("shape_type", "polygon") mask = labelme.utils.shape_to_mask(img.shape[:2], points, shape_type) if group_id is None: group_id = uuid.uuid1() instance = (label, group_id) if instance in masks: masks[instance] = masks[instance] | mask else: masks[instance] = mask if shape_type == "rectangle": (x1, y1), (x2, y2) = points x1, x2 = sorted([x1, x2]) y1, y2 = sorted([y1, y2]) points = [x1, y1, x2, y1, x2, y2, x1, y2] else: points = np.asarray(points).flatten().tolist() segmentations[instance].append(points) segmentations = dict(segmentations) for instance, mask in masks.items(): cls_name, group_id = instance if cls_name not in class_name_to_id: continue cls_id = class_name_to_id[cls_name] mask = np.asfortranarray(mask.astype(np.uint8)) mask = pycocotools.mask.encode(mask) area = float(pycocotools.mask.area(mask)) bbox = pycocotools.mask.toBbox(mask).flatten().tolist() data["annotations"].append( dict( id=len(data["annotations"]), image_id=image_id, category_id=cls_id, segmentation=segmentations[instance], area=area, bbox=bbox, iscrowd=0, )) if not args.noviz: viz = img if masks: labels, captions, masks = zip( *[(class_name_to_id[cnm], cnm, msk) for (cnm, gid), msk in masks.items() if cnm in class_name_to_id]) viz = imgviz.instances2rgb( image=img, labels=labels, masks=masks, captions=captions, font_size=15, line_width=2, ) out_viz_file = osp.join(args.output_dir, "Visualization", base + ".jpg") imgviz.io.imsave(out_viz_file, viz) with open(out_ann_file, "w") as f: json.dump(data, f)
def convert(input_annotated_dir, output_annotated_dir, labels_file='labels.txt', vis=False, save_mask=True, train_valid_split=0.7): assert os.path.isfile( labels_file), "Please provide the correct label file." assert os.path.exists(input_annotated_dir), "Please check the input dir." class_instance_counter = {} train_class_instance_counter = {} if not osp.exists(output_annotated_dir): os.makedirs(output_annotated_dir) os.makedirs(osp.join(output_annotated_dir, 'train', "JPEGImages")) os.makedirs(osp.join(output_annotated_dir, 'valid', "JPEGImages")) if vis: train_vis_dir = osp.join(output_annotated_dir, 'train', 'Visualization') _create_dirs(train_vis_dir) valid_vis_dir = osp.join(output_annotated_dir, 'valid', 'Visualization') _create_dirs(valid_vis_dir) if save_mask and vis: train_mask_dir = osp.join(output_annotated_dir, 'train', 'Masks') _create_dirs(train_mask_dir) valid_mask_dir = osp.join(output_annotated_dir, 'valid', 'Masks') _create_dirs(valid_mask_dir) print("Creating dataset:", output_annotated_dir) training_examples_sofar = 0 now = datetime.datetime.now() train_data = dict( info=dict( description=None, url=None, version=None, year=now.year, contributor=None, date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"), ), licenses=[dict( url=None, id=0, name=None, )], images=[ # license, url, file_name, height, width, date_captured, id ], type="instances", annotations=[ # segmentation, area, iscrowd, image_id, bbox, category_id, id ], categories=[ # supercategory, id, name ], ) valid_data = dict( info=dict( description=None, url=None, version=None, year=now.year, contributor=None, date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"), ), licenses=[dict( url=None, id=0, name=None, )], images=[ # license, url, file_name, height, width, date_captured, id ], type="instances", annotations=[ # segmentation, area, iscrowd, image_id, bbox, category_id, id ], categories=[ # supercategory, id, name ], ) class_name_to_id = {} with open(labels_file, 'r') as lf: for i, line in enumerate(lf.readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() if class_id == -1: assert class_name == "__ignore__" continue if class_name != '_background_': class_instance_counter[class_name] = 0 train_class_instance_counter[class_name] = 0 class_name_to_id[class_name] = class_id train_data["categories"].append( dict( supercategory=None, id=class_id, name=class_name, )) valid_data["categories"].append( dict( supercategory=None, id=class_id, name=class_name, )) train_out_ann_file = osp.join(output_annotated_dir, 'train', "annotations.json") valid_out_ann_file = osp.join(output_annotated_dir, 'valid', "annotations.json") label_files = glob.glob(osp.join(input_annotated_dir, "*.json")) num_label_files = len(label_files) training_percentage = 0.7 if train_valid_split > 1 and train_valid_split <= num_label_files: training_percentage = train_valid_split / num_label_files if train_valid_split > num_label_files: # if the provided number is not valid # e.g. with too many frames # each image has the 0.7 probaility # to be assigned for training train_valid_split = 0.7 _angles = [ 0, 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225, 240, 255, 270, 285, 300, 315, 330, 345, 360 ] for image_id, filename in enumerate(label_files): yield ((image_id + 1) / num_label_files) * 100, filename label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] train_out_img_file = osp.join(output_annotated_dir, 'train', "JPEGImages", base + ".jpg") valid_out_img_file = osp.join(output_annotated_dir, 'valid', "JPEGImages", base + ".jpg") img = labelme.utils.img_data_to_arr(label_file.imageData) is_train = 0 # for area masks = {} # for segmentation segmentations = collections.defaultdict(list) for shape in label_file.shapes: points = shape["points"] label = shape["label"] group_id = shape.get("group_id") shape_type = shape.get("shape_type", "polygon") if train_valid_split > 1: if training_examples_sofar < train_valid_split: train_class_instance_counter[ label] = train_class_instance_counter.get(label, 0) if train_class_instance_counter[label] == 0: is_train = 1 elif train_class_instance_counter[label] <= ( train_valid_split / len(train_class_instance_counter)) + 1: is_train = 1 # make sure very instances in the class is covered # before random sampling elif 0 not in train_class_instance_counter.values(): is_train = 1 else: is_train = np.random.choice( [0, 1], p=[1 - training_percentage, training_percentage]) elif train_valid_split < 1: is_train = np.random.choice( [0, 1], p=[1 - train_valid_split, train_valid_split]) elif train_valid_split == 1: is_train = 1 try: class_instance_counter[label] += 1 except KeyError: class_instance_counter[label] = 1 if is_train == 1: try: train_class_instance_counter[label] += 1 except KeyError: train_class_instance_counter[label] = 1 if shape_type == 'point': try: cx, cy = points[0] radius = 10.0 + np.random.choice(np.arange(0, 1, 0.1)) xs = cx + (radius * np.cos(np.array(_angles) * np.pi / 180)) ys = cy + (radius * np.sin(np.array(_angles) * np.pi / 180)) points = np.asarray([list(p) for p in zip(xs, ys)]) shape_type = "polygon" except IndexError: continue print(f"{filename} has a invalid point {points}.") mask = labelme.utils.shape_to_mask(img.shape[:2], points, shape_type) if group_id is None: group_id = uuid.uuid1() instance = (label, group_id) if instance in masks: masks[instance] = masks[instance] | mask else: masks[instance] = mask if shape_type == "rectangle": (x1, y1), (x2, y2) = points x1, x2 = sorted([x1, x2]) y1, y2 = sorted([y1, y2]) points = np.asarray([x1, y1, x2, y1, x2, y2, x1, y2]) segmentations[instance].append(points.flatten().tolist()) elif shape_type == "circle": (x1, y1), (x2, y2) = points radius = int(((x1 - x2)**2 + (y1 - y2)**2)**(1 / 2)) xs = x1 + (radius * np.cos(np.array(_angles) * np.pi / 180)) ys = y1 + (radius * np.sin(np.array(_angles) * np.pi / 180)) points = np.asarray([list(p) for p in zip(xs, ys)]) points = np.asarray(points).flatten().tolist() shape_type = "polygon" segmentations[instance].append(points) else: points = np.asarray(points).flatten().tolist() segmentations[instance].append(points) segmentations = dict(segmentations) for instance, mask in masks.items(): cls_name, group_id = instance if cls_name not in class_name_to_id: continue cls_id = class_name_to_id[cls_name] mask = np.asfortranarray(mask.astype(np.uint8)) mask = pycocotools.mask.encode(mask) area = float(pycocotools.mask.area(mask)) bbox = pycocotools.mask.toBbox(mask).flatten().tolist() if is_train: train_data["annotations"].append( dict( id=len(train_data["annotations"]), image_id=image_id, category_id=cls_id, segmentation=segmentations[instance], area=area, bbox=bbox, iscrowd=0, )) else: valid_data["annotations"].append( dict( id=len(valid_data["annotations"]), image_id=image_id, category_id=cls_id, segmentation=segmentations[instance], area=area, bbox=bbox, iscrowd=0, )) if is_train == 1: training_examples_sofar += 1 imgviz.io.imsave(train_out_img_file, img) else: imgviz.io.imsave(valid_out_img_file, img) if is_train == 1: train_data["images"].append( dict( license=0, url=None, # handle windows backward slash issue file_name=osp.relpath( train_out_img_file, osp.dirname(train_out_ann_file)).replace("\\", '/'), height=img.shape[0], width=img.shape[1], date_captured=None, id=image_id, )) else: valid_data["images"].append( dict( license=0, url=None, file_name=osp.relpath( valid_out_img_file, osp.dirname(valid_out_ann_file)).replace("\\", '/'), height=img.shape[0], width=img.shape[1], date_captured=None, id=image_id, )) if save_mask and vis: lbl, _ = labelme.utils.shapes_to_label(img.shape, label_file.shapes, class_name_to_id) if is_train == 1: out_mask_file = osp.join(train_mask_dir, base + '_mask.png') else: out_mask_file = osp.join(valid_mask_dir, base + '_mask.png') labelme.utils.lblsave(out_mask_file, lbl) if vis: labels, captions, masks = zip(*[(class_name_to_id[cnm], cnm, msk) for (cnm, gid), msk in masks.items() if cnm in class_name_to_id]) viz = imgviz.instances2rgb( image=img, labels=labels, masks=masks, captions=captions, font_size=15, line_width=2, ) if is_train: out_viz_file = osp.join(output_annotated_dir, "train", "Visualization", base + ".jpg") else: out_viz_file = osp.join(output_annotated_dir, "valid", "Visualization", base + ".jpg") imgviz.io.imsave(out_viz_file, viz) with open(train_out_ann_file, "w") as f: json.dump(train_data, f) with open(valid_out_ann_file, "w") as f: json.dump(valid_data, f) # create a data.yaml config file categories = [] for c in train_data["categories"]: # exclude backgroud with id 0 if not c['id'] == 0: categories.append(c['name']) data_yaml = Path(f"{output_annotated_dir}/data.yaml") names = list(categories) input_annotated_dir_name = os.path.basename(input_annotated_dir) output_annotated_dir_name = os.path.basename(output_annotated_dir) # dataset folder is in same dir as the yolov5 folder with open(data_yaml, 'w') as dy: dy.write(f"DATASET:\n") dy.write(f" name: '{input_annotated_dir_name}'\n") dy.write( f""" train_info: '{data_yaml.parent /"train"/"annotations.json"}'\n""" ) dy.write(f""" train_images: '{data_yaml.parent /"train"}'\n""") dy.write( f""" valid_info: '{data_yaml.parent /"valid"/"annotations.json"}'\n""" ) dy.write(f""" valid_images: '{data_yaml.parent /"valid"}'\n""") dy.write(f" class_names: {names}\n") dy.write(f"YOLACT:\n") dy.write(f" name: '{input_annotated_dir_name}'\n") dy.write(f" dataset: 'dataset_{input_annotated_dir_name}_coco'\n") dy.write(f" max_size: 512\n") print('Done.') print("All: ", class_instance_counter) print("Training set: ", train_class_instance_counter)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument("input_dir", help="input annotated directory") parser.add_argument("output_dir", help="output dataset directory") parser.add_argument("--labels", help="labels file", required=True) parser.add_argument( "--noviz", help="no visualization", action="store_true" ) args = parser.parse_args() # Ignore output directory existance check ''' if osp.exists(args.output_dir): print("Output directory already exists:", args.output_dir) sys.exit(1) ''' os.makedirs(args.output_dir) os.makedirs(osp.join(args.output_dir, "JPEGImages")) os.makedirs(osp.join(args.output_dir, "masks")) os.makedirs(osp.join(args.output_dir, "SegmentationClass")) os.makedirs(osp.join(args.output_dir, "SegmentationClassPNG")) if not args.noviz: os.makedirs( osp.join(args.output_dir, "SegmentationClassVisualization") ) os.makedirs(osp.join(args.output_dir, "SegmentationObject")) os.makedirs(osp.join(args.output_dir, "SegmentationObjectPNG")) if not args.noviz: os.makedirs( osp.join(args.output_dir, "SegmentationObjectVisualization") ) print("Creating dataset:", args.output_dir) class_names = [] class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() class_name_to_id[class_name] = class_id if class_id == -1: assert class_name == "__ignore__" continue elif class_id == 0: assert class_name == "_background_" class_names.append(class_name) class_names = tuple(class_names) print("class_names:", class_names) out_class_names_file = osp.join(args.output_dir, "class_names.txt") with open(out_class_names_file, "w") as f: f.writelines("\n".join(class_names)) print("Saved class_names:", out_class_names_file) for filename in glob.glob(osp.join(args.input_dir, "*.json")): print("Generating dataset from:", filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg") out_cls_file = osp.join( args.output_dir, "SegmentationClass", base + ".npy" ) out_clsp_file = osp.join( args.output_dir, "SegmentationClassPNG", base + ".png" ) if not args.noviz: out_clsv_file = osp.join( args.output_dir, "SegmentationClassVisualization", base + ".jpg", ) out_ins_file = osp.join( args.output_dir, "SegmentationObject", base + ".npy" ) out_insp_file = osp.join( args.output_dir, "SegmentationObjectPNG", base + ".png" ) if not args.noviz: out_insv_file = osp.join( args.output_dir, "SegmentationObjectVisualization", base + ".jpg", ) img = labelme.utils.img_data_to_arr(label_file.imageData) imgviz.io.imsave(out_img_file, img) cls, ins = labelme.utils.shapes_to_label( filename, img_shape=img.shape, shapes=label_file.shapes, label_name_to_value=class_name_to_id, ) ins[cls == -1] = 0 # ignore it. # class label labelme.utils.lblsave(out_clsp_file, cls) np.save(out_cls_file, cls) if not args.noviz: clsv = imgviz.label2rgb( label=cls, img=imgviz.rgb2gray(img), label_names=class_names, font_size=15, loc="rb", ) imgviz.io.imsave(out_clsv_file, clsv) # instance label labelme.utils.lblsave(out_insp_file, ins) np.save(out_ins_file, ins) if not args.noviz: instance_ids = np.unique(ins) instance_names = [str(i) for i in range(max(instance_ids) + 1)] insv = imgviz.label2rgb( label=ins, img=imgviz.rgb2gray(img), label_names=instance_names, font_size=15, loc="rb", ) imgviz.io.imsave(out_insv_file, insv)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input_dir', help='input annotated directory') parser.add_argument('output_dir', help='output dataset directory') parser.add_argument('--labels', help='labels file', required=True) parser.add_argument('--noviz', help='no visualization', action='store_true') args = parser.parse_args() if osp.exists(args.output_dir): print('Output directory already exists:', args.output_dir) sys.exit(1) os.makedirs(args.output_dir) os.makedirs(osp.join(args.output_dir, 'JPEGImages')) os.makedirs(osp.join(args.output_dir, 'Annotations')) if not args.noviz: os.makedirs(osp.join(args.output_dir, 'AnnotationsVisualization')) print('Creating dataset:', args.output_dir) class_names = [] class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() class_name_to_id[class_name] = class_id if class_id == -1: assert class_name == '__ignore__' continue elif class_id == 0: assert class_name == '_background_' class_names.append(class_name) class_names = tuple(class_names) print('class_names:', class_names) out_class_names_file = osp.join(args.output_dir, 'class_names.txt') with open(out_class_names_file, 'w') as f: f.writelines('\n'.join(class_names)) print('Saved class_names:', out_class_names_file) for filename in glob.glob(osp.join(args.input_dir, '*.json')): print('Generating dataset from:', filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(args.output_dir, 'JPEGImages', base + '.jpg') out_xml_file = osp.join(args.output_dir, 'Annotations', base + '.xml') if not args.noviz: out_viz_file = osp.join(args.output_dir, 'AnnotationsVisualization', base + '.jpg') img = labelme.utils.img_data_to_arr(label_file.imageData) imgviz.io.imsave(out_img_file, img) maker = lxml.builder.ElementMaker() xml = maker.annotation( maker.folder(), maker.filename(base + '.jpg'), maker.database(), # e.g., The VOC2007 Database maker.annotation(), # e.g., Pascal VOC2007 maker.image(), # e.g., flickr maker.size( maker.height(str(img.shape[0])), maker.width(str(img.shape[1])), maker.depth(str(img.shape[2])), ), maker.segmented(), ) bboxes = [] labels = [] for shape in label_file.shapes: if shape['shape_type'] != 'rectangle': print('Skipping shape: label={label}, shape_type={shape_type}'. format(**shape)) continue class_name = shape['label'] class_id = class_names.index(class_name) (xmin, ymin), (xmax, ymax) = shape['points'] # swap if min is larger than max. xmin, xmax = sorted([xmin, xmax]) ymin, ymax = sorted([ymin, ymax]) bboxes.append([ymin, xmin, ymax, xmax]) labels.append(class_id) xml.append( maker.object( maker.name(shape['label']), maker.pose(), maker.truncated(), maker.difficult(), maker.bndbox( maker.xmin(str(xmin)), maker.ymin(str(ymin)), maker.xmax(str(xmax)), maker.ymax(str(ymax)), ), )) if not args.noviz: captions = [class_names[label] for label in labels] viz = imgviz.instances2rgb( image=img, labels=labels, bboxes=bboxes, captions=captions, font_size=15, ) imgviz.io.imsave(out_viz_file, viz) with open(out_xml_file, 'wb') as f: f.write(lxml.etree.tostring(xml, pretty_print=True))
def do_semantic_segmentation_voc(self): output_dir = self.output_directory.line_edit.text() input_dir = self.input_directory.line_edit.text() labels = self.labels_file.line_edit.text() noviz = not self.output_visualization.isChecked() if '' in [output_dir, input_dir, labels]: return if len(os.listdir(output_dir)) > 0: diag = QMessageBox() diag.setText("Output directory must be empty") diag.exec() return os.makedirs(osp.join(output_dir, "JPEGImages")) os.makedirs(osp.join(output_dir, "SegmentationClass")) os.makedirs(osp.join(output_dir, "SegmentationClassPNG")) if not noviz: os.makedirs( osp.join(output_dir, "SegmentationClassVisualization") ) class_names, class_name_to_id = self.retrieve_labels(labels) out_class_names_file = osp.join(output_dir, "class_names.txt") with open(out_class_names_file, "w") as f: f.writelines("\n".join(class_names)) for filename in glob.glob(osp.join(input_dir, "*.json")): label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(output_dir, "JPEGImages", base + ".jpg") out_lbl_file = osp.join( output_dir, "SegmentationClass", base + ".npy" ) out_png_file = osp.join( output_dir, "SegmentationClassPNG", base + ".png" ) if not noviz: out_viz_file = osp.join( output_dir, "SegmentationClassVisualization", base + ".jpg", ) with open(out_img_file, "wb") as f: f.write(label_file.imageData) img = labelme.utils.img_data_to_arr(label_file.imageData) lbl, _ = labelme.utils.shapes_to_label( img_shape=img.shape, shapes=label_file.shapes, label_name_to_value=class_name_to_id, ) labelme.utils.lblsave(out_png_file, lbl) np.save(out_lbl_file, lbl) if not noviz: viz = imgviz.label2rgb( label=lbl, img=imgviz.rgb2gray(img), font_size=15, label_names=class_names, loc="rb", ) imgviz.io.imsave(out_viz_file, viz) self.hide()
def do_instance_segmentation_coco(self): try: import pycocotools.mask except ImportError: diag = QMessageBox() diag.setText("Please install pycocotools:\n\n pip install pycocotools\n") diag.exec() return output_dir = self.output_directory.line_edit.text() input_dir = self.input_directory.line_edit.text() labels = self.labels_file.line_edit.text() noviz = not self.output_visualization.isChecked() if '' in [output_dir, input_dir, labels]: return if len(os.listdir(output_dir)) > 0: diag = QMessageBox() diag.setText("Output directory must be empty") diag.exec() return os.makedirs(osp.join(output_dir, "JPEGImages")) if not noviz: os.makedirs(osp.join(output_dir, "Visualization")) print("Creating dataset:", output_dir) now = datetime.datetime.now() data = dict( info=dict( description=None, url=None, version=None, year=now.year, contributor=None, date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"), ), licenses=[dict(url=None, id=0, name=None, )], images=[ # license, url, file_name, height, width, date_captured, id ], type="instances", annotations=[ # segmentation, area, iscrowd, image_id, bbox, category_id, id ], categories=[ # supercategory, id, name ], ) labels = open(labels).readlines() # ensure that the labels start with ignore and background try: labels.remove('__ignore__') except ValueError: pass try: labels.remove('_background_') except ValueError: pass new_labels = ['__ignore__', '_background_'] new_labels.extend(labels) class_name_to_id = {} for i, line in enumerate(new_labels): class_id = i - 1 # starts with -1 class_name = line.strip() if class_id == -1: assert class_name == "__ignore__" continue class_name_to_id[class_name] = class_id data["categories"].append( dict(supercategory=None, id=class_id, name=class_name, ) ) out_ann_file = osp.join(output_dir, "annotations.json") label_files = glob.glob(osp.join(input_dir, "*.json")) for image_id, filename in enumerate(label_files): print("Generating dataset from:", filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(output_dir, "JPEGImages", base + ".jpg") img = labelme.utils.img_data_to_arr(label_file.imageData) imgviz.io.imsave(out_img_file, img) data["images"].append( dict( license=0, url=None, file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)), height=img.shape[0], width=img.shape[1], date_captured=None, id=image_id, ) ) masks = {} # for area segmentations = collections.defaultdict(list) # for segmentation for shape in label_file.shapes: points = shape["points"] label = shape["label"] group_id = shape.get("group_id") shape_type = shape.get("shape_type", "polygon") mask = labelme.utils.shape_to_mask( img.shape[:2], points, shape_type ) if group_id is None: group_id = uuid.uuid1() instance = (label, group_id) if instance in masks: masks[instance] = masks[instance] | mask else: masks[instance] = mask if shape_type == "rectangle": (x1, y1), (x2, y2) = points x1, x2 = sorted([x1, x2]) y1, y2 = sorted([y1, y2]) points = [x1, y1, x2, y1, x2, y2, x1, y2] else: points = np.asarray(points).flatten().tolist() segmentations[instance].append(points) segmentations = dict(segmentations) for instance, mask in masks.items(): cls_name, group_id = instance if cls_name not in class_name_to_id: continue cls_id = class_name_to_id[cls_name] mask = np.asfortranarray(mask.astype(np.uint8)) mask = pycocotools.mask.encode(mask) area = float(pycocotools.mask.area(mask)) bbox = pycocotools.mask.toBbox(mask).flatten().tolist() data["annotations"].append( dict( id=len(data["annotations"]), image_id=image_id, category_id=cls_id, segmentation=segmentations[instance], area=area, bbox=bbox, iscrowd=0, ) ) if not noviz: try: labels, captions, masks = zip( *[ (class_name_to_id[cnm], cnm, msk) for (cnm, gid), msk in masks.items() if cnm in class_name_to_id ] ) viz = imgviz.instances2rgb( image=img, labels=labels, masks=masks, captions=captions, font_size=15, line_width=2, ) out_viz_file = osp.join( output_dir, "Visualization", base + ".jpg" ) imgviz.io.imsave(out_viz_file, viz) except ValueError as e: print(f'Failed to create visualization for {base}.jpg') with open(out_ann_file, "w") as f: json.dump(data, f) self.hide()
def do_instance_segmentation_voc(self): output_dir = self.output_directory.line_edit.text() input_dir = self.input_directory.line_edit.text() labels = self.labels_file.line_edit.text() noviz = not self.output_visualization.isChecked() if '' in [output_dir, input_dir, labels]: return if len(os.listdir(output_dir)) > 0: diag = QMessageBox() diag.setText("Output directory must be empty") diag.exec() return os.makedirs(osp.join(output_dir, "JPEGImages")) os.makedirs(osp.join(output_dir, "SegmentationClass")) os.makedirs(osp.join(output_dir, "SegmentationClassPNG")) if not noviz: os.makedirs( osp.join(output_dir, "SegmentationClassVisualization") ) os.makedirs(osp.join(output_dir, "SegmentationObject")) os.makedirs(osp.join(output_dir, "SegmentationObjectPNG")) if not noviz: os.makedirs( osp.join(output_dir, "SegmentationObjectVisualization") ) class_names, class_name_to_id = self.retrieve_labels(labels) out_class_names_file = osp.join(output_dir, "class_names.txt") with open(out_class_names_file, "w") as f: f.writelines("\n".join(class_names)) for filename in glob.glob(osp.join(input_dir, "*.json")): label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(output_dir, "JPEGImages", base + ".jpg") out_cls_file = osp.join( output_dir, "SegmentationClass", base + ".npy" ) out_clsp_file = osp.join( output_dir, "SegmentationClassPNG", base + ".png" ) if not noviz: out_clsv_file = osp.join( output_dir, "SegmentationClassVisualization", base + ".jpg", ) out_ins_file = osp.join( output_dir, "SegmentationObject", base + ".npy" ) out_insp_file = osp.join( output_dir, "SegmentationObjectPNG", base + ".png" ) if not noviz: out_insv_file = osp.join( output_dir, "SegmentationObjectVisualization", base + ".jpg", ) img = labelme.utils.img_data_to_arr(label_file.imageData) imgviz.io.imsave(out_img_file, img) cls, ins = labelme.utils.shapes_to_label( img_shape=img.shape, shapes=label_file.shapes, label_name_to_value=class_name_to_id, ) ins[cls == -1] = 0 # ignore it. # class label labelme.utils.lblsave(out_clsp_file, cls) np.save(out_cls_file, cls) if not noviz: clsv = imgviz.label2rgb( label=cls, img=imgviz.rgb2gray(img), label_names=class_names, font_size=15, loc="rb", ) imgviz.io.imsave(out_clsv_file, clsv) # instance label labelme.utils.lblsave(out_insp_file, ins) np.save(out_ins_file, ins) if not noviz: instance_ids = np.unique(ins) instance_names = [str(i) for i in range(max(instance_ids) + 1)] insv = imgviz.label2rgb( label=ins, img=imgviz.rgb2gray(img), label_names=instance_names, font_size=15, loc="rb", ) imgviz.io.imsave(out_insv_file, insv) self.hide()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('raw_labelme_dir', help='Path to input `raw_labelme` directory') args = parser.parse_args() if os.path.exists(OUT_DIR): print('Output directory already exists:' + OUT_DIR) sys.exit(1) labels_filepath = os.path.join(args.raw_labelme_dir, 'labels.txt') if not os.path.exists(labels_filepath): print('Labels file does not exist') sys.exit(1) print('Creating dataset:', OUT_DIR) os.makedirs(OUT_DIR) os.makedirs(ANNOT_DIR) partitions = ["train", "val"] now = datetime.datetime.now() # Run for "train" and "val" partitions. for partition in partitions: IMG_DIR_IN = os.path.join(args.raw_labelme_dir, f"{partition}_images") IMG_DIR_OUT = os.path.join(OUT_DIR, f"{partition}2014") ANN_DIR_IN = os.path.join(args.raw_labelme_dir, f"{partition}_annotations") ANN_FILEPATH = os.path.join(ANNOT_DIR, f"instances_{partition}2014.json") os.makedirs(IMG_DIR_OUT) class_name_to_id = {} data = dict( info=dict( description=None, url=None, version=None, year=now.year, contributor=None, date_created=now.strftime('%Y-%m-%d %H:%M:%S.%f'), ), licenses=[dict( url=None, id=0, name=None, )], images=[ # license, url, file_name, height, width, date_captured, id ], type='instances', annotations=[ # segmentation, area, iscrowd, image_id, bbox, category_id, id ], categories=[ # supercategory, id, name ], ) for class_id, line in enumerate(open(labels_filepath).readlines()): class_name = line.strip() if class_id == 0: assert class_name == '__ignore__' continue class_name_to_id[class_name] = class_id data['categories'].append(dict( supercategory=None, id=class_id, name=class_name, )) label_files = glob.glob(os.path.join(ANN_DIR_IN, '*.json')) for image_id, filename in enumerate(label_files): print('Generating dataset from:', filename) label_file = labelme.LabelFile(filename=filename) base = os.path.splitext(os.path.basename(filename))[0] out_img_file = os.path.join(IMG_DIR_OUT, base + ".jpg") img = labelme.utils.img_data_to_arr(label_file.imageData) PIL.Image.fromarray(img).save(out_img_file) data['images'].append(dict( license=0, url=None, file_name=os.path.basename(out_img_file), height=img.shape[0], width=img.shape[1], date_captured=None, id=image_id, )) masks = {} # for area segmentations = collections.defaultdict(list) # for segmentation for shape in label_file.shapes: points = shape['points'] label = shape['label'] group_id = shape.get('group_id') shape_type = shape.get('shape_type') mask = labelme.utils.shape_to_mask( img.shape[:2], points, shape_type ) if group_id is None: group_id = uuid.uuid1() instance = (label, group_id) if instance in masks: masks[instance] = masks[instance] | mask else: masks[instance] = mask points = np.asarray(points).flatten().tolist() segmentations[instance].append(points) segmentations = dict(segmentations) for instance, mask in masks.items(): cls_name, group_id = instance if cls_name not in class_name_to_id: continue cls_id = class_name_to_id[cls_name] mask = np.asfortranarray(mask.astype(np.uint8)) mask = pycocotools.mask.encode(mask) area = float(pycocotools.mask.area(mask)) bbox = pycocotools.mask.toBbox(mask).flatten().tolist() data['annotations'].append(dict( id=len(data['annotations']), image_id=image_id, category_id=cls_id, segmentation=segmentations[instance], area=area, bbox=bbox, iscrowd=0, )) with open(ANN_FILEPATH, 'w') as f: json.dump(data, f)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--input_dir", nargs='*', help="input annotated directory") parser.add_argument("--output_dir", help="output dataset directory") parser.add_argument("--labels", help="labels file", required=True) parser.add_argument("--noviz", help="no visualization", action="store_true") parser.add_argument('--split', type=float, nargs='?', default=0.9, help="Train set size; a number in (0, 1)") args = parser.parse_args() if osp.exists(args.output_dir): print("Output directory already exists:", args.output_dir) sys.exit(1) os.makedirs(args.output_dir) os.makedirs(osp.join(args.output_dir, "JPEGImages")) if not args.noviz: os.makedirs(osp.join(args.output_dir, "Visualization")) print("Creating dataset:", args.output_dir) now = datetime.datetime.now() data = dict( info=dict( description=None, url=None, version=None, year=now.year, contributor=None, date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"), ), licenses=[dict( url=None, id=0, name=None, )], images=[ # license, url, file_name, height, width, date_captured, id ], type="instances", annotations=[ # segmentation, area, iscrowd, image_id, bbox, category_id, id ], categories=[ # supercategory, id, name ], ) class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() if class_id == -1: assert class_name == "__ignore__" continue class_name_to_id[class_name] = class_id data["categories"].append( dict( supercategory=None, id=class_id, name=class_name, )) out_ann_file_all = osp.join(args.output_dir, "_coco.json") out_ann_file_train = osp.join(args.output_dir, "_train.json") out_ann_file_test = osp.join(args.output_dir, "_test.json") print("working directory is:", os.getcwd()) label_files_per_dir = [] # label_files is a list of lists if args.input_dir is None: args.input_dir = [x[0] for x in os.walk(os.getcwd())] args.input_dir.sort(key=lambda f: int(regex.sub('\D', '', f))) print("input dir(s) are:", args.input_dir) if isinstance(args.input_dir, list): # multiple dirs were given: for dir in args.input_dir: label_files_per_dir.append(glob.glob(osp.join(dir, "*.json"))) else: label_files_per_dir = [glob.glob(osp.join(args.input_dir, "*.json"))] data_train = copy.deepcopy(data) data_test = copy.deepcopy(data) image_id = 0 for label_files in label_files_per_dir: # train, test split if len(label_files) > 0: train, test = train_test_split(label_files, train_size=args.split) for set_name, split_set in [("train", train), ("test", test)]: print("generating set:", set_name) for filename in split_set: print("Generating dataset from:", filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg") img = labelme.utils.img_data_to_arr(label_file.imageData) imgviz.io.imsave(out_img_file, img) image_data = dict( license=0, url=None, file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file_all)), height=img.shape[0], width=img.shape[1], date_captured=None, id=image_id, ) masks = {} # for area segmentations = collections.defaultdict( list) # for segmentation for shape in label_file.shapes: points = shape["points"] label = shape["label"] group_id = shape.get("group_id") shape_type = shape.get("shape_type", "polygon") mask = labelme.utils.shape_to_mask( img.shape[:2], points, shape_type) if group_id is None: group_id = uuid.uuid1() instance = (label, group_id) if instance in masks: masks[instance] = masks[instance] | mask else: masks[instance] = mask if shape_type == "rectangle": (x1, y1), (x2, y2) = points x1, x2 = sorted([x1, x2]) y1, y2 = sorted([y1, y2]) points = [x1, y1, x2, y1, x2, y2, x1, y2] else: points = np.asarray(points).flatten().tolist() segmentations[instance].append(points) segmentations = dict(segmentations) if len(masks.keys()) > 0: # image contains annotations, so add it. data["images"].append(image_data) if set_name == "train": data_train["images"].append(image_data) if set_name == "test": data_test["images"].append(image_data) for instance, mask in masks.items(): cls_name, group_id = instance if cls_name not in class_name_to_id: continue cls_id = class_name_to_id[cls_name] mask = np.asfortranarray(mask.astype(np.uint8)) mask = pycocotools.mask.encode(mask) area = float(pycocotools.mask.area(mask)) bbox = pycocotools.mask.toBbox(mask).flatten().tolist() annotation_data = dict( id=len(data["annotations"]), image_id=image_id, category_id=cls_id, segmentation=segmentations[instance], area=area, bbox=bbox, iscrowd=0, ) data["annotations"].append(annotation_data) if set_name == "train": data_train["annotations"].append(annotation_data) if set_name == "test": data_test["annotations"].append(annotation_data) if not args.noviz: labels, captions, masks = zip( *[(class_name_to_id[cnm], cnm, msk) for (cnm, gid), msk in masks.items() if cnm in class_name_to_id]) viz = imgviz.instances2rgb( image=img, labels=labels, masks=masks, captions=captions, font_size=15, line_width=2, ) out_viz_file = osp.join(args.output_dir, "Visualization", base + ".jpg") imgviz.io.imsave(out_viz_file, viz) # increment image counter image_id += 1 with open(out_ann_file_all, "w") as f: json.dump(data, f) with open(out_ann_file_train, "w") as f: json.dump(data_train, f) with open(out_ann_file_test, "w") as f: json.dump(data_test, f)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument("input_dir", help="input annotated directory") parser.add_argument("--labels", help="labels file", required=True) parser.add_argument( "--noviz", help="no visualization", action="store_true" ) args = parser.parse_args() now = datetime.datetime.now() ########################## f=open('annotation_mask.txt', 'w') count=0 class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() if class_id == -1: assert class_name == "__ignore__" continue class_name_to_id[class_name] = class_id label_files = glob.glob(osp.join(args.input_dir, "*.json")) for image_id, filename in enumerate(label_files): label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] img = labelme.utils.img_data_to_arr(label_file.imageData) masks = {} # for area segmentations = collections.defaultdict(list) # for segmentation for shape in label_file.shapes: points = shape["points"] label = shape["label"] group_id = shape.get("group_id") shape_type = shape.get("shape_type", "polygon") mask = labelme.utils.shape_to_mask( img.shape[:2], points, shape_type ) if group_id is None: group_id = uuid.uuid1() instance = (label, group_id) if instance in masks: masks[instance] = masks[instance] | mask else: masks[instance] = mask if shape_type == "rectangle": (x1, y1), (x2, y2) = points x1, x2 = sorted([x1, x2]) y1, y2 = sorted([y1, y2]) points = [x1, y1, x2, y1, x2, y2, x1, y2] else: points = np.asarray(points).flatten().tolist() segmentations[instance].append(points) segmentations = dict(segmentations) for instance, mask in masks.items(): cls_name, group_id = instance if cls_name not in class_name_to_id: continue cls_id = class_name_to_id[cls_name] mask = np.asfortranarray(mask.astype(np.uint8)) mask = pycocotools.mask.encode(mask) if not args.noviz: labels, captions, masks = zip( *[ (class_name_to_id[cnm], cnm, msk) for (cnm, gid), msk in masks.items() if cnm in class_name_to_id ] ) ###########333 f.write(str(base)) f.write("\n") f.write(str((masks))) f.write("\n") mask = [np.where(m == 1, 255, m) for m in masks[0]] #f.write(str((mask))) for i in mask: for k in i: if k==255 : count=count+1 f.write(str(count)) f.write("\n\n") count=0 ########## f.close()
def Labelme2Yolo(ym='det'): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input_dir', help='input annotated directory') parser.add_argument('output_dir', help='output dataset directory') parser.add_argument('--labels', help='labels file', required=True) parser.add_argument('--viz', help='visualize', action='store_true') args = parser.parse_args() output_dir = args.output_dir assert not osp.exists(output_dir) os.makedirs(output_dir) if args.viz: os.makedirs(osp.join(output_dir, 'Viz')) os.makedirs(osp.join(output_dir, 'images')) os.makedirs(osp.join(output_dir, 'labels')) print('Creating dataset:', output_dir) cls_to_id = {} ofs = 0 for i, line in enumerate(open(args.labels).readlines()): cls_name = line.strip() if cls_name.startswith('_'): ofs = 1 continue class_id = i - ofs # start with -1 or 0 cls_to_id[cls_name] = class_id label_files = glob(osp.join(args.input_dir, '*.json')) for image_id, jsonfile in enumerate(tqdm(label_files)): label_file = labelme.LabelFile(filename=jsonfile) img = labelme.utils.img_data_to_arr(label_file.imageData) base = osp.splitext(osp.basename(jsonfile))[0] + '.jpg' dst_img = osp.join(output_dir, 'images', base).replace('\\', '/') if label_file.imagePath.endswith('.jpg'): #copy(osp.join(args.input_dir, label_file.imagePath), dst_img) copyfile(osp.join(args.input_dir, label_file.imagePath), dst_img) else: imgviz.io.imsave(dst_img, img) #Image.fromarray(img).save(dst_img) masks = {} # for area for shape in label_file.shapes: points = shape['points'] label = shape['label'] group_id = shape.get('group_id') shape_type = shape.get('shape_type', 'polygon') mask = labelme.utils.shape_to_mask(img.shape[:2], points, shape_type) if group_id is None: group_id = uuid.uuid1() instance = (label, group_id) if instance in masks: masks[instance] = masks[instance] | mask else: masks[instance] = mask H, W = img.shape[:2] box = [] res = '' for (cls_name, group_id), mask in masks.items(): mask = np.asfortranarray(mask.astype(np.uint8)) mask = pycocotools.mask.encode(mask) bbox = pycocotools.mask.toBbox(mask).ravel() bbox[0] += bbox[2] / 2 bbox[1] += bbox[3] / 2 bbox[::2] /= W bbox[1::2] /= H box += [bbox] if cls_name not in cls_to_id: continue cls_id = cls_to_id[cls_name] # top_left->center res += '%d %.6f %.6f %.6f %.6f\n' % (cls_id, *bbox) dst_txt = osp.join(output_dir, 'labels', base[:-4]) with open(dst_txt + '.txt', 'w') as f: f.write(res) if args.viz and box: # center->top_left x, y, w, h = np.array(box).transpose() x -= w / 2 y -= h / 2 box = np.array([y * H, x * W, (y + h) * H, (x + w) * W]).transpose() c2i = lambda x: cls_to_id[x] if x in cls_to_id else len(cls_to_id) lab, cap, mk = zip(*[(c2i(c), c, mk) for (c, g), mk in masks.items()]) viz = imgviz.instances2rgb( image=img, labels=lab, bboxes=list(box), #masks=mk, captions=cap, font_size=12, line_width=2) imgviz.io.imsave(osp.join(output_dir, 'Viz', base), viz) res = dict(train=f'../{output_dir}/images/', val=f'../{output_dir}/images/', nc=len(cls_to_id), names=[i for i in cls_to_id]) with open(osp.join(output_dir, ym + '.yaml'), 'w') as f: yaml.dump(res, f, sort_keys=False)
def main(): parser = argparse.ArgumentParser(description="json2coco") parser.add_argument("--input_dir", help="input annotated directory",default="./images") parser.add_argument("--output_dir", help="output dataset directory",default="./output") parser.add_argument("--labels", help="labels file", default='./labels.txt')#required=True parser.add_argument( "--noviz", help="no visualization", action="store_true") args = parser.parse_args() if not args.noviz: vis_dir = osp.join(args.output_dir, "Visualization") if not os.path.exists(vis_dir): os.makedirs(vis_dir) print("Creating Visualization:", vis_dir) now = datetime.datetime.now() data = dict( info=dict( description="seedling datasets", url=None, version="label=4.5.6", year=now.year, contributor=None, date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"), ), #licenses=[dict(url=None, id=0, name=None,)], images=[ # license, url, file_name, height, width, date_captured, id ], type="instances", annotations=[ # segmentation, area, iscrowd, image_id, bbox, category_id, id ], categories=[ # supercategory, id, name ], ) class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() if class_id == -1: assert class_name == "__ignore__" continue if class_id == 0: assert class_name == "__background__" continue class_name_to_id[class_name] = class_id #print(class_id,class_name,'\n') data["categories"].append( dict(supercategory="1", id=class_id, name=class_name,)#一类目标+背景,id=0表示背景 ) print("categories 生成完成",'\n') label_files = glob.glob(osp.join(args.input_dir, "*.json"))#图像id从json文件中读取 for image_id, filename in enumerate(label_files): print(image_id, filename) #print("Generating dataset from:", filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0]#图片名 out_img_file = osp.join(args.output_dir, base + ".jpg")# 保存图片路径 img = labelme.utils.img_data_to_arr(label_file.imageData) imgviz.io.imsave(out_img_file, img) masks = {} # for area segmentations = collections.defaultdict(list) # for segmentation for shape in label_file.shapes: points = shape["points"] label = shape["label"] group_id = shape.get("group_id") shape_type = shape.get("shape_type", "polygon") mask = labelme.utils.shape.shape_to_mask(img.shape[:2], points, shape_type)#labelme=4.5.6的shape_to_mask函数 if group_id is None: group_id = uuid.uuid1() instance = (label, group_id) #print(instance) if instance in masks: masks[instance] = masks[instance] | mask else: masks[instance] = mask if shape_type == "rectangle": (x1, y1), (x2, y2) = points x1, x2 = sorted([x1, x2]) y1, y2 = sorted([y1, y2]) points = [x1, y1, x2, y1, x2, y2, x1, y2] else: points = np.asarray(points).flatten().tolist() segmentations[instance].append(points) segmentations = dict(segmentations) if not args.noviz: labels, captions, masks = zip( *[ (class_name_to_id[cnm], cnm, msk) for (cnm, gid), msk in masks.items() if cnm in class_name_to_id ] ) print(labels) viz = imgviz.instances2rgb( image=img, labels=labels, masks=masks, captions=captions, font_size=15, line_width=2, alpha=1, # colormap=[255,0,0], ) out_viz_file = osp.join( args.output_dir, "Visualization", base + ".jpg" ) imgviz.io.imsave(out_viz_file, viz)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input_dir', help='input annotated directory') parser.add_argument('output_dir', help='output dataset directory') parser.add_argument('--labels', help='labels file', required=True) parser.add_argument('--noviz', help='no visualization', action='store_true') args = parser.parse_args() if osp.exists(args.output_dir): print('Output directory already exists:', args.output_dir) sys.exit(1) os.makedirs(args.output_dir) os.makedirs(osp.join(args.output_dir, 'JPEGImages')) os.makedirs(osp.join(args.output_dir, 'SegmentationClass')) os.makedirs(osp.join(args.output_dir, 'SegmentationClassPNG')) if not args.noviz: os.makedirs(osp.join(args.output_dir, 'SegmentationClassVisualization')) print('Creating dataset:', args.output_dir) class_names = [] class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() class_name_to_id[class_name] = class_id if class_id == -1: assert class_name == '__ignore__' continue elif class_id == 0: assert class_name == '_background_' class_names.append(class_name) class_names = tuple(class_names) print('class_names:', class_names) out_class_names_file = osp.join(args.output_dir, 'class_names.txt') with open(out_class_names_file, 'w') as f: f.writelines('\n'.join(class_names)) print('Saved class_names:', out_class_names_file) for filename in glob.glob(osp.join(args.input_dir, '*.json')): print('Generating dataset from:', filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(args.output_dir, 'JPEGImages', base + '.jpg') out_lbl_file = osp.join(args.output_dir, 'SegmentationClass', base + '.npy') out_png_file = osp.join(args.output_dir, 'SegmentationClassPNG', base + '.png') if not args.noviz: out_viz_file = osp.join( args.output_dir, 'SegmentationClassVisualization', base + '.jpg', ) with open(out_img_file, 'wb') as f: f.write(label_file.imageData) img = labelme.utils.img_data_to_arr(label_file.imageData) lbl = labelme.utils.shapes_to_label( img_shape=img.shape, shapes=label_file.shapes, label_name_to_value=class_name_to_id, ) labelme.utils.lblsave(out_png_file, lbl) np.save(out_lbl_file, lbl) if not args.noviz: viz = imgviz.label2rgb( label=lbl, img=imgviz.rgb2gray(img), font_size=15, label_names=class_names, loc='rb', ) imgviz.io.imsave(out_viz_file, viz)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "--input_dir", default= 'C:/Users/62349/Desktop/labelme/examples/instance_segmentation/pear_4_train', help="input annotated directory") parser.add_argument( "--output_dir", default= 'C:/Users/62349/Desktop/labelme/examples/instance_segmentation/train', help="output dataset directory") parser.add_argument( "--labels", default= 'C:/Users/62349/Desktop/labelme/examples/instance_segmentation/labels.txt', help="labels file") # 标签,其中含有每个类别的名字 parser.add_argument("--noviz", help="no visualization", action="store_true") args = parser.parse_args() if osp.exists(args.output_dir): print("Output directory already exists:", args.output_dir) sys.exit(1) os.makedirs(args.output_dir) os.makedirs(osp.join(args.output_dir, "JPEGImages")) if not args.noviz: os.makedirs(osp.join(args.output_dir, "Visualization")) print("Creating dataset:", args.output_dir) now = datetime.datetime.now() data = dict( info=dict( description=None, url=None, version=None, year=now.year, contributor=None, date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"), ), licenses=[dict( url=None, id=0, name=None, )], images=[ # license, url, file_name, height, width, date_captured, id ], type="instances", annotations=[ # segmentation, area, iscrowd, image_id, bbox, category_id, id ], categories=[ # supercategory, id, name ], ) class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() if class_id == -1: assert class_name == "__ignore__" continue class_name_to_id[class_name] = class_id data["categories"].append( dict( supercategory=None, id=class_id, name=class_name, )) out_ann_file = osp.join(args.output_dir, "annotations.json") #label_files = glob.glob(osp.join(args.input_dir, "*.json")) ### clw modify label_files = [] for root, dirs, files in os.walk(args.input_dir): for file in files: if os.path.splitext(file)[1] == '.json': # 想要保存的文件格式 label_files.append(os.path.join(root, file)) ### #print('clw:', label_files) for image_id, filename in enumerate(label_files): print("Generating dataset from:", filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] #print('clw:base=', base) out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg") img = labelme.utils.img_data_to_arr(label_file.imageData) imgviz.io.imsave(out_img_file, img) data["images"].append( dict( license=0, url=None, file_name=base + ".jpg", # clw modify: origin is osp.relpath(out_img_file, osp.dirname(out_ann_file)), not fit the windows height=img.shape[0], width=img.shape[1], date_captured=None, id=image_id, )) masks = {} # for area segmentations = collections.defaultdict(list) # for segmentation for shape in label_file.shapes: points = shape["points"] label = shape["label"] group_id = shape.get("group_id") shape_type = shape.get("shape_type", "polygon") mask = labelme.utils.shape_to_mask(img.shape[:2], points, shape_type) if group_id is None: group_id = uuid.uuid1() instance = (label, group_id) if instance in masks: masks[instance] = masks[instance] | mask else: masks[instance] = mask if shape_type == "rectangle": (x1, y1), (x2, y2) = points x1, x2 = sorted([x1, x2]) y1, y2 = sorted([y1, y2]) points = [x1, y1, x2, y1, x2, y2, x1, y2] else: points = np.asarray(points).flatten().tolist() segmentations[instance].append(points) segmentations = dict(segmentations) for instance, mask in masks.items(): cls_name, group_id = instance if cls_name not in class_name_to_id: continue cls_id = class_name_to_id[cls_name] mask = np.asfortranarray(mask.astype(np.uint8)) mask = pycocotools.mask.encode(mask) area = float(pycocotools.mask.area(mask)) bbox = pycocotools.mask.toBbox(mask).flatten().tolist() ### clw note: 考虑扩边处理,比如用YOLACT检测水果圆度,如果没有扩边,可能矩形标注框和梨贴合过于紧密,导致回归的时候, # 有时框并不那么准确,因此会有部分梨超出bbox范围,这样在crop_mask的时候,会有部分mask被bbox的直线截取, # 导致圆度的测量会不准确! ### if cls_name == 'pear': border = 200 bbox[0] -= border bbox[1] -= border bbox[2] += border * 2 # bbox[2]和[3]是wh,因此需要扩2倍 bbox[3] += border * 2 ### data["annotations"].append( dict( id=len(data["annotations"]), image_id=image_id, category_id=cls_id, segmentation=segmentations[instance], area=area, bbox=bbox, iscrowd=0, )) if not args.noviz: labels, captions, masks = zip(*[(class_name_to_id[cnm], cnm, msk) for (cnm, gid), msk in masks.items() if cnm in class_name_to_id]) viz = imgviz.instances2rgb( image=img, labels=labels, masks=masks, captions=captions, font_size=15, line_width=2, ) out_viz_file = osp.join(args.output_dir, "Visualization", base + ".jpg") imgviz.io.imsave(out_viz_file, viz) with open(out_ann_file, "w") as f: json.dump(data, f)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input_dir', help='input annotated directory') parser.add_argument('output_dir', help='output dataset directory') parser.add_argument('--labels', help='labels file', required=True) args = parser.parse_args() if osp.exists(args.output_dir): print('Output directory already exists:', args.output_dir) sys.exit(1) os.makedirs(args.output_dir) os.makedirs(osp.join(args.output_dir, 'JPEGImages')) print('Creating dataset:', args.output_dir) now = datetime.datetime.now() data = dict( info=dict( description=None, url=None, version=None, year=now.year, contributor=None, date_created=now.strftime('%Y-%m-%d %H:%M:%S.%f'), ), licenses=[dict( url=None, id=0, name=None, )], images=[ # license, url, file_name, height, width, date_captured, id ], type='instances', annotations=[ # segmentation, area, iscrowd, image_id, bbox, category_id, id ], categories=[ # supercategory, id, name ], ) class_name_to_id = {} for i, line in enumerate(open(args.labels).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() if class_id == -1: assert class_name == '__ignore__' continue class_name_to_id[class_name] = class_id data['categories'].append( dict( supercategory=None, id=class_id, name=class_name, )) out_ann_file = osp.join(args.output_dir, 'annotations.json') label_files = glob.glob(osp.join(args.input_dir, '*.json')) for image_id, filename in enumerate(label_files): print('Generating dataset from:', filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(args.output_dir, 'JPEGImages', base + '.jpg') img = labelme.utils.img_data_to_arr(label_file.imageData) PIL.Image.fromarray(img).save(out_img_file) data['images'].append( dict( license=0, url=None, file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)), height=img.shape[0], width=img.shape[1], date_captured=None, id=image_id, )) masks = {} # for area segmentations = collections.defaultdict(list) # for segmentation for shape in label_file.shapes: points = shape['points'] label = shape['label'] group_id = shape.get('group_id') shape_type = shape.get('shape_type') mask = labelme.utils.shape_to_mask(img.shape[:2], points, shape_type) if group_id is None: group_id = uuid.uuid1() instance = (label, group_id) if instance in masks: masks[instance] = masks[instance] | mask else: masks[instance] = mask points = np.asarray(points).flatten().tolist() segmentations[instance].append(points) segmentations = dict(segmentations) for instance, mask in masks.items(): cls_name, group_id = instance if cls_name not in class_name_to_id: continue cls_id = class_name_to_id[cls_name] mask = np.asfortranarray(mask.astype(np.uint8)) mask = pycocotools.mask.encode(mask) area = float(pycocotools.mask.area(mask)) bbox = pycocotools.mask.toBbox(mask).flatten().tolist() data['annotations'].append( dict( id=len(data['annotations']), image_id=image_id, category_id=cls_id, segmentation=segmentations[instance], area=area, bbox=bbox, iscrowd=0, )) with open(out_ann_file, 'w') as f: json.dump(data, f)
def labelme2voc(input_dir, output_dir, annotations_file, trainval_percent, train_percent): print(input_dir) print(output_dir) output_dir = osp.join(output_dir) if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) os.makedirs(osp.join(output_dir, "VOC2007", "JPEGImages")) os.makedirs(osp.join(output_dir, "VOC2007", "Annotations")) os.makedirs(osp.join(output_dir, "VOC2007", "ImageSets")) os.makedirs(osp.join(output_dir, "VOC2007", "ImageSets", "Main")) # os.makedirs(osp.join(output_dir, "VOC2012", "JPEGImages")) # os.makedirs(osp.join(output_dir, "VOC2012", "Annotations")) # os.makedirs(osp.join(output_dir, "VOC2012", "ImageSets")) # os.makedirs(osp.join(output_dir, "VOC2012", "ImageSets", "Main")) print("Creating dataset:", output_dir) class_names = [] class_name_to_id = {} for i, line in enumerate( open(osp.join(input_dir, annotations_file)).readlines()): class_id = i - 1 # starts with -1 class_name = line.strip() class_name_to_id[class_name] = class_id if class_id == -1: assert class_name == "__ignore__" continue elif class_id == 0: assert class_name == "_background_" class_names.append(class_name) class_names = tuple(class_names) print("class_names:", class_names) out_class_names_file = osp.join(output_dir, "class_names.txt") with open(out_class_names_file, "w") as f: f.writelines("\n".join(class_names)) print("Saved class_names:", out_class_names_file) for filename in glob.glob(osp.join(input_dir, "*.json")): print("Generating dataset from:", filename) label_file = labelme.LabelFile(filename=filename) base = osp.splitext(osp.basename(filename))[0] out_img_file = osp.join(output_dir, "VOC2007", "JPEGImages", base + ".jpg") out_xml_file = osp.join(output_dir, "VOC2007", "Annotations", base + ".xml") img = labelme.utils.img_data_to_arr(label_file.imageData) imgviz.io.imsave(out_img_file, img) maker = lxml.builder.ElementMaker() if img.shape.count == 3: xml = maker.annotation( maker.folder(), maker.filename(base + ".jpg"), maker.database(), # e.g., The VOC2007 Database maker.annotation(), # e.g., Pascal VOC2007 maker.image(), # e.g., flickr maker.size( maker.height(str(img.shape[0])), maker.width(str(img.shape[1])), maker.depth(str(img.shape[2])), ), maker.segmented(), ) else: xml = maker.annotation( maker.folder(), maker.filename(base + ".jpg"), maker.database(), # e.g., The VOC2007 Database maker.annotation(), # e.g., Pascal VOC2007 maker.image(), # e.g., flickr maker.size( maker.height(str(img.shape[0])), maker.width(str(img.shape[1])), ), maker.segmented(), ) bboxes = [] labels = [] for shape in label_file.shapes: if shape["shape_type"] != "rectangle": print("Skipping shape: label={label}, " "shape_type={shape_type}".format(**shape)) continue class_name = shape["label"] class_id = class_names.index(class_name) (xmin, ymin), (xmax, ymax) = shape["points"] # swap if min is larger than max. xmin, xmax = sorted([xmin, xmax]) ymin, ymax = sorted([ymin, ymax]) bboxes.append([ymin, xmin, ymax, xmax]) labels.append(class_id) xml.append( maker.object( maker.name(shape["label"]), maker.pose(), maker.truncated(), maker.difficult(), maker.bndbox( maker.xmin(str(xmin)), maker.ymin(str(ymin)), maker.xmax(str(xmax)), maker.ymax(str(ymax)), ), )) with open(out_xml_file, "wb") as f: f.write(lxml.etree.tostring(xml, pretty_print=True)) createTxt(trainval_percent, train_percent, output_dir)