def parse_keypoints(content, outdir): keypoints = dict( zip(range(1, len(content['categories'][0]['keypoints']) + 1), content['categories'][0]['keypoints'])) # merge images and annotations: id in images vs image_id in annotations merged_info_list = map( cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])) # convert category name to person for keypoint in merged_info_list: keypoint['category_id'] = "person" # group by filename to pool all bbox and keypoint in same file for name, groups in cytoolz.groupby('file_name', merged_info_list).items(): filename = os.path.join(outdir, os.path.splitext(name)[0] + ".xml") anno_tree = keypoints2xml_base(groups[0]) for group in groups: anno_tree = keypoints2xml_object(group, anno_tree, keypoints, bbox_type="xyxy") doc = etree.ElementTree(anno_tree) doc.write(open(filename, "w"), pretty_print=True) print("Formating keypoints xml file {} done!".format(name))
def parse_instance(content, outdir): categories = {d['id']: d['name'] for d in content['categories']} # merge images and annotations: id in images vs image_id in annotations merged_info_list = map( cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])) # convert category id to name for instance in merged_info_list: instance['category_id'] = categories[instance['category_id']] # group by filename to pool all bbox in same file for name, groups in cytoolz.groupby('file_name', merged_info_list).items(): anno_tree = instance2xml_base(groups[0]) # if one file have multiple different objects, save it in each category sub-directory filenames = [] for group in groups: if group[u'iscrowd'] == 0: filenames.append( os.path.join(outdir, re.sub(" ", "_", group['category_id']), os.path.splitext(name)[0] + ".xml")) anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy')) for filename in filenames: etree.ElementTree(anno_tree).write(filename, pretty_print=True) print "Formating instance xml file {} done!".format(name)
def extract_urls(args): if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) content = json.load(open(args.anno_file, 'r')) merge_info_list = map( cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])) if args.type == 'instance': outfiles = { category['id']: os.path.join( args.output_dir, re.sub(" ", "_", category['name']) + "_image_urls.txt") for category in content['categories'] } for info in merge_info_list: print "Saving file name: ", info['file_name'] with open(outfiles[info['category_id']], "a") as f: f.write( os.path.splitext(info['file_name'])[0] + " " + info['coco_url'] + "\n") f.close() print "Exporting coco image urls for instance done!" else: outfile = os.path.join(args.output_dir, "person_keypoints_imag_urls.txt") url_dict = { info['file_name']: info['coco_url'] for info in merge_info_list } with open(outfile, "w") as f: for name, url in url_dict.items(): f.write(os.path.splitext(name)[0] + " " + url + "\n") f.close() print "Exporting coco image urls for keypoints done!"
def parse_instance(content, outdir): categories = {d['id']: d['name'] for d in content['categories']} # merge images and annotations: id in images vs image_id in annotations merged_info_list = list( map( cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))) # convert category id to name for instance in merged_info_list: instance['category_id'] = categories[instance['category_id']] # group by filename to pool all bbox in same file for name, groups in cytoolz.groupby('file_name', merged_info_list).items(): multiple = groups[0]['url'].split('/')[-4] if multiple != 'multiple': continue subfolder = groups[0]['url'].split('/')[-2] folder = groups[0]['url'].split('/')[-3] if not os.path.exists(os.path.join(outdir, folder)): os.mkdir(os.path.join(outdir, folder)) if not os.path.exists(os.path.join(outdir, folder, subfolder)): os.mkdir(os.path.join(outdir, folder, subfolder)) anno_tree = instance2xml_base(groups[0]) # if one file have multiple different objects, save it in each category sub-directory filenames = [] for group in groups: filenames.append( os.path.join(outdir, folder, subfolder, os.path.splitext(name)[0] + ".xml")) anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy')) for filename in filenames: etree.ElementTree(anno_tree).write(filename, pretty_print=True) print("Formating instance xml file {} done!".format(name))
def parse_instance(content, outdir): categories = {d['id']: d['name'] for d in content['categories']} # merge images and annotations: id in images vs image_id in annotations merged_info_list = list(map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))) filtered_info_list = [] # convert category id to name && get target object info for instance in merged_info_list: cat_name = categories[instance['category_id']] filepath = os.path.join(dataDir, instance['file_name']) if cat_name in target_classes: # 过滤对于voc不合格的照片 origimg = Image.open(filepath) if len(np.asarray(origimg).shape) != 3: continue instance['category_id'] = cat_name filtered_info_list.append(instance) # ## 控制每个类别的数量 # target_image_list = [] # for img_info in filtered_info_list: # if img_info['category_id'] == 'bicycle': # target_image_list.append(img_info) # elif len(target_image_list) < total_num: # target_image_list.append(img_info) # group by filename to pool all bbox in same file target_images = [] for name, groups in cytoolz.groupby('file_name', filtered_info_list).items(): anno_tree = instance2xml_base(groups[0]) # if one file have multiple different objects, save it in each category sub-directory filenames = [] for group in groups: # filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']), # 'annotations', os.path.splitext(name)[0] + ".xml")) filenames.append(os.path.join(outdir, 'annotations', os.path.splitext(name)[0] + ".xml")) anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy')) for filename in filenames: etree.ElementTree(anno_tree).write(filename, pretty_print=True) print("Formating instance xml file {} done!".format(name)) # copy target image file to outdir if name not in target_images: img_path = os.path.join(dataDir, name) # target_dir = os.path.join(output_dir, re.sub(" ", "_", group['category_id']), 'images', name) target_dir = os.path.join(output_dir, 'images', name) shutil.copyfile(img_path, target_dir) target_images.append(name) # if len(target_images) > total_num: # break print(len(target_images))
def broadcast_dimensions(argpairs, numblocks, sentinels=(1, (1, )), consolidate=None): """ Find block dimensions from arguments Parameters ---------- argpairs: iterable name, ijk index pairs numblocks: dict maps {name: number of blocks} sentinels: iterable (optional) values for singleton dimensions consolidate: func (optional) use this to reduce each set of common blocks into a smaller set Examples -------- >>> argpairs = [('x', 'ij'), ('y', 'ji')] >>> numblocks = {'x': (2, 3), 'y': (3, 2)} >>> broadcast_dimensions(argpairs, numblocks) {'i': 2, 'j': 3} Supports numpy broadcasting rules >>> argpairs = [('x', 'ij'), ('y', 'ij')] >>> numblocks = {'x': (2, 1), 'y': (1, 3)} >>> broadcast_dimensions(argpairs, numblocks) {'i': 2, 'j': 3} Works in other contexts too >>> argpairs = [('x', 'ij'), ('y', 'ij')] >>> d = {'x': ('Hello', 1), 'y': (1, (2, 3))} >>> broadcast_dimensions(argpairs, d) {'i': 'Hello', 'j': (2, 3)} """ # List like [('i', 2), ('j', 1), ('i', 1), ('j', 2)] argpairs2 = [(a, ind) for a, ind in argpairs if ind is not None] L = toolz.concat([ zip(inds, dims) for (x, inds), (x, dims) in toolz.join( toolz.first, argpairs2, toolz.first, numblocks.items()) ]) g = toolz.groupby(0, L) g = dict((k, set([d for i, d in v])) for k, v in g.items()) g2 = dict( (k, v - set(sentinels) if len(v) > 1 else v) for k, v in g.items()) if consolidate: return toolz.valmap(consolidate, g2) if g2 and not set(map(len, g2.values())) == set([1]): raise ValueError("Shapes do not align %s" % g) return toolz.valmap(toolz.first, g2)
def broadcast_dimensions(argpairs, numblocks, sentinels=(1, (1,)), consolidate=None): """ Find block dimensions from arguments Parameters ---------- argpairs: iterable name, ijk index pairs numblocks: dict maps {name: number of blocks} sentinels: iterable (optional) values for singleton dimensions consolidate: func (optional) use this to reduce each set of common blocks into a smaller set Examples -------- >>> argpairs = [('x', 'ij'), ('y', 'ji')] >>> numblocks = {'x': (2, 3), 'y': (3, 2)} >>> broadcast_dimensions(argpairs, numblocks) {'i': 2, 'j': 3} Supports numpy broadcasting rules >>> argpairs = [('x', 'ij'), ('y', 'ij')] >>> numblocks = {'x': (2, 1), 'y': (1, 3)} >>> broadcast_dimensions(argpairs, numblocks) {'i': 2, 'j': 3} Works in other contexts too >>> argpairs = [('x', 'ij'), ('y', 'ij')] >>> d = {'x': ('Hello', 1), 'y': (1, (2, 3))} >>> broadcast_dimensions(argpairs, d) {'i': 'Hello', 'j': (2, 3)} """ # List like [('i', 2), ('j', 1), ('i', 1), ('j', 2)] argpairs2 = [(a, ind) for a, ind in argpairs if ind is not None] L = toolz.concat([zip(inds, dims) for (x, inds), (x, dims) in toolz.join(toolz.first, argpairs2, toolz.first, numblocks.items())]) g = toolz.groupby(0, L) g = dict((k, set([d for i, d in v])) for k, v in g.items()) g2 = dict((k, v - set(sentinels) if len(v) > 1 else v) for k, v in g.items()) if consolidate: return toolz.valmap(consolidate, g2) if g2 and not set(map(len, g2.values())) == set([1]): raise ValueError("Shapes do not align %s" % g) return toolz.valmap(toolz.first, g2)
def parse_instance(content, outdir): categories = {d['id']: d['name'] for d in content['categories']} # merge images and annotations: id in images vs image_id in annotations merged_info_list = list(map(cytoolz.merge, cytoolz.join( 'id', content['images'], 'image_id', content['annotations']))) # convert category id to name for instance in merged_info_list: instance['category_id'] = categories[instance['category_id']] # group by filename to pool all bbox in same file for name, groups in cytoolz.groupby('file_name', merged_info_list).items(): anno_tree = instance2xml_base(groups[0]) for group in groups: anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy')) filename = os.path.join(outdir, os.path.splitext(name)[0] + ".xml") etree.ElementTree(anno_tree).write(filename, pretty_print=True) print("Formating instance xml file {} done!".format(name))
def parse_keypoints(content, outdir): keypoints = dict(zip(range(1, len(content['categories'][0]['keypoints'])+1), content['categories'][0]['keypoints'])) # merge images and annotations: id in images vs image_id in annotations merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])) # convert category name to person for keypoint in merged_info_list: keypoint['category_id'] = "person" # group by filename to pool all bbox and keypoint in same file for name, groups in cytoolz.groupby('file_name', merged_info_list).items(): filename = os.path.join(outdir, os.path.splitext(name)[0]+".xml") anno_tree = keypoints2xml_base(groups[0]) for group in groups: anno_tree = keypoints2xml_object(group, anno_tree, keypoints, bbox_type="xyxy") doc = etree.ElementTree(anno_tree) doc.write(open(filename, "w"), pretty_print=True) print "Formating keypoints xml file {} done!".format(name)
def parse_instance(content, outdir): categories = {d['id']: d['name'] for d in content['categories']} # merge images and annotations: id in images vs image_id in annotations merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])) # convert category id to name for instance in merged_info_list: instance['category_id'] = categories[instance['category_id']] # group by filename to pool all bbox in same file for name, groups in cytoolz.groupby('file_name', merged_info_list).items(): anno_tree = instance2xml_base(groups[0]) # if one file have multiple different objects, save it in each category sub-directory filenames = [] for group in groups: filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']), os.path.splitext(name)[0] + ".xml")) anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy')) for filename in filenames: etree.ElementTree(anno_tree).write(filename, pretty_print=True) print "Formating instance xml file {} done!".format(name)
def coco2xml_convert(anno_file, output_dir='./result', bbox_type='xywh', folder_split=False): """converting COCO format to xml format. Args: anno_file (str): Path to annotations of data. output_dir (str): Path to save folder. bbox_type (str): Bbox format. Default: 'xywh'. folder_split (bool): Whether to store file by category. """ os.makedirs(output_dir, exist_ok=True) annotation = json.load(open(anno_file, 'r')) categories = {d['id']: d['name'] for d in annotation['categories']} # merge images and annotations: id in images vs image_id in annotations merged_info_list = list(map(cytoolz.merge, cytoolz.join('id', annotation['images'], 'image_id', annotation['annotations']))) # convert category id to name for instance in merged_info_list: instance['category_id'] = categories[instance['category_id']] # group by filename to pool all bbox in same file for name, groups in cytoolz.groupby('file_name', merged_info_list).items(): anno_tree = instance2xml_base(groups[0]) filenames = [] for group in groups: if folder_split: subdir = os.path.join(output_dir, group['category_id']) os.makedirs(subdir, exist_ok=True) filenames.append( os.path.join(subdir, os.path.splitext(name)[0] + '.xml')) else: filenames = [ os.path.join(output_dir, os.path.splitext(name)[0] + '.xml')] anno_tree.append(instance2xml_bbox(group, bbox_type=bbox_type)) for filename in filenames: etree.ElementTree(anno_tree).write(filename, pretty_print=True) print(f'Formating instance xml file {name} done!')
def extract_urls(args): if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) content = json.load(open(args.anno_file, 'r')) merge_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])) if args.type == 'instance': outfiles = {category['id']: os.path.join(args.output_dir, re.sub(" ", "_", category['name'])+"_image_urls.txt") for category in content['categories']} for info in merge_info_list: print "Saving file name: ", info['file_name'] with open(outfiles[info['category_id']], "a") as f: f.write(os.path.splitext(info['file_name'])[0]+" "+info['coco_url']+"\n") f.close() print "Exporting coco image urls for instance done!" else: outfile = os.path.join(args.output_dir, "person_keypoints_imag_urls.txt") url_dict = {info['file_name']: info['coco_url'] for info in merge_info_list} with open(outfile, "w") as f: for name, url in url_dict.items(): f.write(os.path.splitext(name)[0]+" "+url+"\n") f.close() print "Exporting coco image urls for keypoints done!"
def parse_instance(content, outdir): """ Version that does not create replicate files for each category. """ categories = {d['id']: d['name'] for d in content['categories']} # merge images and annotations: id in images vs image_id in annotations merged_info_list = list( map( cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))) # convert category id to name for instance in merged_info_list: instance['category_id'] = categories[instance['category_id']] # group by filename to pool all bbox in same file for name, groups in cytoolz.groupby('file_name', merged_info_list).items(): anno_tree = instance2xml_base(groups[0]) # if one file have multiple different objects, save it in each category sub-directory filename = os.path.join(outdir, os.path.splitext(name)[0] + ".xml") for group in groups: anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy')) etree.ElementTree(anno_tree).write(filename, pretty_print=True)
def left_join2(lseq, rseq, key): key_fn = operator.itemgetter(*key) return (sorted(list( dict(l, **r) for l, r in cytoolz.join( key_fn, lseq, key_fn, rseq, right_default={})), key=key_fn))
binop = lambda total, acc: total + acc[2] print(reduceby(get(3), binop, accounts), 0) # {'M': 400, 'F': 400} # SEMI-STREAMING `JOIN` # We register multiple datasets together with `join`. # Consider a second dataset storing addresses by ID addresses = [ (1, '123 Main Street'), # id, address (2, '5 Adams Way'), (5, '34 Rue St Michel') ] # SELECT accounts.name, addresses.address FROM accounts, addresses # WHERE accounts.id = addresses.id; result = join(first, accounts, first, addresses) for ((id, name, bal, gender), (id, address)) in result: print((id, name, bal, gender), (id, address)) print((name, address)) # toolz.itertoolz.join(leftkey, leftseq, rightkey, rightseq, left_default='__no__default__', right_default='__no__default__') # This is a semi-streaming operation. The LEFT sequence is fully evaluated # and placed into memory. The RIGHT sequence is evaluated lazily and so # can be arbitrarily large. # `JOIN` ON ARBITRARY FUNCTIONS / DATA def isodd(x): return x % 2 == 1
'year': 1, 'acct': 6, 'y': 'y6' }, { 'year': 1, 'acct': 7, 'y': 'y7' }] key = ('year', 'acct') key_fn = operator.itemgetter(*key) list(dict(x, **y) for y in ly for x in lx if key_fn(x) == key_fn(y)) list(dict(x, **y) for x, y in cytoolz.join(key_fn, lx, key_fn, ly)) list( dict(x, **y) for x, y in itertools.product(lx, ly) if key_fn(x) == key_fn(y)) list( dict(_[0], **_[1]) for _ in cytoolz.groupby(key_fn, lx + ly).values() if len(_) > 1) [cytoolz.merge(x, y) for x in lx for y in ly if key_fn(x) == key_fn(y)] # OUTPUT: # [{'acct': 3, 'x': 'x3', 'y': 'y3', 'year': 1}, # {'acct': 4, 'x': 'x4', 'y': 'y4', 'year': 1}, # {'acct': 5, 'x': 'x5', 'y': 'y5', 'year': 1}]
def parse_instance(content, outdir, download_images=False): categories = {d['id']: d['name'] for d in content['categories']} # EDITED - make sure image_id is of type int (and not of type string) for i in range(len(content['annotations'])): content['annotations'][i]['image_id'] = int( content['annotations'][i]['image_id']) # EDITED - save all annotation .xml files into same sub-directory anno_dir = os.path.join(outdir, "annotations") if not os.path.exists(anno_dir): os.makedirs(anno_dir) # EDITED - download images if download_images: im_dir = os.path.join(outdir, "images") if not os.path.exists(im_dir): os.makedirs(im_dir) for index, obj in enumerate(content['images']): print( f"Downloading image {index} of {len(content['images'])} from: {obj['coco_url']}" ) # Update 'filename' field to be a (local) filename and not a url im_local_filename = os.path.splitext( os.path.basename(obj['file_name']))[0] + ".jpg" obj['file_name'] = im_local_filename # download image dst_path = os.path.join(im_dir, im_local_filename) urllib.request.urlretrieve(obj['coco_url'], dst_path) # merge images and annotations: id in images vs image_id in annotations merged_info_list = list( map( cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))) # convert category id to name for instance in merged_info_list: assert 'category_id' in instance, f"WARNING: annotation error: image {instance['file_name']} has a rectangle without a 'category_id' field." instance['category_id'] = categories[instance['category_id']] # group by filename to pool all bbox in same file img_filenames = {} names_groups = cytoolz.groupby('file_name', merged_info_list).items() for index, (name, groups) in enumerate(names_groups): print( f"Converting annotations for image {index} of {len(names_groups)}: {name}" ) assert not name.lower().startswith( ("http:", "https:") ), "Image seems to be a url rather than local. Need to set 'download_images' = False" anno_tree = instance2xml_base(groups[0], download_images) # if one file have multiple different objects, save it in each category sub-directory filenames = [] for group in groups: filename = os.path.splitext(name)[0] + ".xml" # EDITED - save all annotations in single folder, rather than separate folders for each object #filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']), filename)) filenames.append(os.path.join(anno_dir, filename)) anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy')) for filename in filenames: etree.ElementTree(anno_tree).write(filename, pretty_print=True)
def join(self, rightseq, leftkey, rightkey): return self.__class__(cytoolz.join(leftkey, self, rightkey, rightseq))
def keyjoin(leftkey, leftseq, rightkey, rightseq): return starmap(merge, join(leftkey, leftseq, rightkey, rightseq))
def key_join(left_key, left_seq, right_key, right_seq): return starmap(merge, join(left_key, left_seq, right_key, right_seq))