def split_dataset(self, val_size=0.1, to_file=None): imgs_train, imgs_val = cvtools.split_dict(self.COCO.imgs, val_size) print('images: {} train, {} test.'.format(len(imgs_train), len(imgs_val))) # deal train data train = dict(info=self.coco_dataset['info'], categories=self.coco_dataset['categories']) train['images'] = list(imgs_train.values()) # bad design anns = [] for key in imgs_train.keys(): anns += self.COCO.imgToAnns[key] train['annotations'] = anns # deal test data val = dict(info=train['info'], categories=train['categories']) val['images'] = list(imgs_val.values()) anns = [] for key in imgs_val.keys(): anns += self.COCO.imgToAnns[key] val['annotations'] = anns if to_file: path, name = osp.split(to_file) cvtools.save_json(train, to_file=osp.join(path, 'train_'+name)) cvtools.save_json(val, to_file=osp.join(path, 'val_'+name)) return val, train
def save_cat_datasets(self, to_file): for dataset in self.catToDatasets: cvtools.save_json( dataset, to_file=to_file.format( dataset['categories'][0]['name']) )
def _cluster_by_cat(self, save_root, name_clusters=('bbox', ), n_clusters=(3,)): assert len(name_clusters) == len(n_clusters) cluster_dict = defaultdict(lambda: defaultdict(list)) for key, ann in self.COCO.anns.items(): cat_name = self.COCO.cats[ann['category_id']]['name'] if 'area' in name_clusters: cluster_dict[cat_name]['area'].append(ann['area']) if 'w-vs-h' in name_clusters: cluster_dict[cat_name]['w-vs-h'].append( ann['bbox'][2] / float(ann['bbox'][3])) cvtools.makedirs(save_root) for cat_name, cluster_value in cluster_dict.items(): cluster_values = cluster_dict[cat_name] cluster_results = defaultdict(lambda: defaultdict(list)) for i, cluster_name in enumerate(cluster_values.keys()): if len(cluster_value) < n_clusters[i]: continue centers = cvtools.k_means_cluster( np.array(cluster_value).reshape(-1, 1), n_clusters=n_clusters[i]) cluster_results[cluster_name][cat_name].append(list(centers.reshape(-1))) cvtools.save_json(cluster_results, osp.join(save_root, 'cluster_{}.json'.format(cat_name)))
def stats_objs_per_cat(self, to_file='objs_per_cat_data.json'): cls_to_num = list() for cat_id in self.coco.catToImgs: item = dict() item['name'] = self.coco.cats[cat_id]['name'] item['value'] = len(self.coco.catToImgs[cat_id]) cls_to_num.append(item) cvtools.save_json(cls_to_num, to_file=to_file)
def crop_with_label(self, save_root='./', iof_th=0.5): image_ids = self.COCO.getImgIds() image_ids.sort() if cvtools._DEBUG: roidb = copy.deepcopy(self.COCO.loadImgs(image_ids))[:10] else: roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) print('{} images.'.format(len(roidb))) cvtools.makedirs(save_root + '/images') cvtools.makedirs(save_root + '/labelTxt+crop') stats = defaultdict(crop_objs=0, total_objs=0, missing_objs=0, total_croped_images=0) for entry in tqdm(roidb): if cvtools._DEBUG: print('crop {}'.format(entry['file_name'])) # read image image_name = entry['file_name'] image_file = osp.join(self.img_prefix, image_name) img = cvtools.imread(image_file) if img is None: print('{} is None.'.format(image_file)) continue # crop image crop_imgs, starts = self.crop(img) # handling the box at the edge of the cropped image ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) img_to_objs, obj_to_num = self.deal_edged_boxes(ann_ids, crop_imgs, starts, iof_th=iof_th) if img_to_objs is None: continue # stats for _, num in obj_to_num.items(): stats['crop_objs'] += num stats['total_objs'] += len(ann_ids) stats['missing_objs'] += len(set(ann_ids) - set(obj_to_num.keys())) for obj in img_to_objs.values(): if len(obj) > 0: stats['total_croped_images'] += 1 # save results # self.save_crop_labeltxt(image_name, img_to_objs, save_root) # save stats values total_images = len(roidb) stats['total_images'] = len(roidb) stats['objs_per_croped_image'] = stats['total_croped_images'] / float( total_images) stats['objs_per_image'] = stats['total_objs'] / float(total_images) cvtools.save_json(stats, to_file='stats.json')
def stats_objs_per_img(self, to_file='stats_num.json'): total_anns = 0 imgToNum = defaultdict() for cat_id, ann_ids in self.coco.catToImgs.items(): imgs = set(ann_ids) total_anns += len(ann_ids) assert len(imgs) > 0 cat_name = self.coco.cats[cat_id]['name'] imgToNum[cat_name] = len(ann_ids) / float(len(imgs)) imgToNum['total'] = total_anns / float(len(self.coco.imgs)) print(imgToNum) cvtools.save_json(imgToNum, to_file)
def stats_num(self, save='stats_num.json'): total_anns = 0 imgToNum = defaultdict() for cat_id, ann_ids in self.COCO.catToImgs.items(): imgs = set(ann_ids) total_anns += len(ann_ids) assert len(imgs) > 0 imgToNum[self.COCO.cats[cat_id]['name']] = len(ann_ids) / float( len(imgs)) imgToNum['total'] = total_anns / float(len(self.COCO.imgs)) print(imgToNum) cvtools.save_json(imgToNum, save)
def crop_for_test(self, save): from collections import defaultdict imgs = cvtools.get_images_list(self.img_prefix) self.test_dataset = defaultdict(list) for image_file in tqdm(imgs): if cvtools._DEBUG: print('crop {}'.format(image_file)) image_name = osp.basename(image_file) img = cvtools.imread(image_file) # support chinese if img is None: print('{} is None.'.format(image_file)) continue crop_imgs, starts = self.crop(img) for crop_img, start in zip(crop_imgs, starts): crop_rect = start[0], start[1], start[0] + crop_img.shape[ 1], start[1] + crop_img.shape[0] self.test_dataset[image_name].append(crop_rect) cvtools.save_json(self.test_dataset, save)
def crop_in_order_for_test(self, save, w=1920, h=1080, overlap=0.): assert 1920 >= w >= 800 and 1080 >= h >= 800 and 0.5 >= overlap >= 0. from collections import defaultdict imgs = cvtools.get_images_list(self.img_prefix) crop = CropInOder(width_size=w, height_size=h, overlap=overlap) self.test_dataset = defaultdict(list) for image_file in tqdm(imgs): if cvtools._DEBUG: print('crop {}'.format(image_file)) image_name = osp.basename(image_file) img = cvtools.imread(image_file) # support chinese if img is None: print('{} is None.'.format(image_file)) continue crop_imgs, starts, _ = crop(img) for crop_img, start in zip(crop_imgs, starts): crop_rect = start[0], start[1], start[0] + crop_img.shape[ 1], start[1] + crop_img.shape[0] self.test_dataset[image_name].append(crop_rect) cvtools.save_json(self.test_dataset, save)
def stats_size_per_cat(self, to_file='size_per_cat_data.json'): self.cat_size = defaultdict(list) for cat_id, dataset in enumerate(self.catToDatasets): self.cat_size[dataset['categories'][0]['name']] = [ ann_info['bbox'][2]*ann_info['bbox'][2] for ann_info in dataset['annotations']] self.cat_size = dict( sorted(self.cat_size.items(), key=lambda item: len(item[1]))) g2_data = [] for cat_name, sizes in self.cat_size.items(): data_dict = dict() data_dict['Category'] = cat_name data_dict['small'] = len( [size for size in sizes if pow(self.size_range[0], 2) >= size]) data_dict['medium'] = len( [size for size in sizes if pow(self.size_range[1], 2) >= size > pow(self.size_range[0], 2)]) data_dict['large'] = len( [size for size in sizes if size > pow(self.size_range[1], 2)]) g2_data.append(data_dict) cvtools.save_json(g2_data, to_file)
def split_dataset(self, to_file='data.json', val_size=0.1): imgs_train, imgs_val = cvtools.split_dict(self.COCO.imgs, val_size) print('images: {} train, {} test.'.format(len(imgs_train), len(imgs_val))) path, name = osp.split(to_file) dataset = copy.deepcopy(self.coco_dataset) # deal train data dataset['images'] = list(imgs_train.values()) # bad design anns = [] for key in imgs_train.keys(): anns += self.COCO.imgToAnns[key] dataset['annotations'] = anns cvtools.save_json(dataset, to_file=osp.join(path, 'train_' + name)) # deal test data dataset['images'] = list(imgs_val.values()) anns = [] for key in imgs_val.keys(): anns += self.COCO.imgToAnns[key] dataset['annotations'] = anns cvtools.save_json(dataset, to_file=osp.join(path, 'val_' + name))
def crop_for_test(self, w, h, save=None): imgs = cvtools.get_images_list(self.img_prefix) if cvtools._DEBUG: imgs = imgs[:10] self.test_dataset = defaultdict(list) for i, image_file in enumerate(imgs): image_name = osp.basename(image_file) img = cvtools.imread(image_file) # support chinese if img is None: print('{} is None.'.format(image_file)) continue print('crop image %d of %d: %s' % (i, len(imgs), image_name)) crop_imgs, starts = sliding_crop(img, w, h) for crop_img, start in zip(crop_imgs, starts): crop_rect = start[0], start[1], \ start[0]+crop_img.shape[1], start[1]+crop_img.shape[0] self.test_dataset[image_name].append(crop_rect) if save is not None: cvtools.save_json(self.test_dataset, save) return self.test_dataset