def save_cat_datasets(self, to_file): for dataset in self.catToDatasets: cvtools.dump_json( dataset, to_file=to_file.format( dataset['categories'][0]['name']) )
def _cluster_by_cat(self, save_root, name_clusters=('bbox', ), n_clusters=(3, )): assert len(name_clusters) == len(n_clusters) cluster_dict = defaultdict(lambda: defaultdict(list)) for key, ann in self.COCO.anns.items(): cat_name = self.COCO.cats[ann['category_id']]['name'] if 'area' in name_clusters: cluster_dict[cat_name]['area'].append(ann['area']) if 'w-vs-h' in name_clusters: cluster_dict[cat_name]['w-vs-h'].append(ann['bbox'][2] / float(ann['bbox'][3])) cvtools.makedirs(save_root) for cat_name, cluster_value in cluster_dict.items(): cluster_values = cluster_dict[cat_name] cluster_results = defaultdict(lambda: defaultdict(list)) for i, cluster_name in enumerate(cluster_values.keys()): if len(cluster_value) < n_clusters[i]: continue centers = cvtools.k_means_cluster( np.array(cluster_value).reshape(-1, 1), n_clusters=n_clusters[i]) cluster_results[cluster_name][cat_name].append( list(centers.reshape(-1))) cvtools.dump_json( cluster_results, osp.join(save_root, 'cluster_{}.json'.format(cat_name)))
def split_dataset(self, val_size=0.1, to_file=None): imgs_train, imgs_val = cvtools.split_dict(self.COCO.imgs, val_size) print('images: {} train, {} test.'.format(len(imgs_train), len(imgs_val))) # deal train data train = dict(info=self.coco_dataset['info'], categories=self.coco_dataset['categories']) train['images'] = list(imgs_train.values()) # bad design anns = [] for key in imgs_train.keys(): anns += self.COCO.imgToAnns[key] train['annotations'] = anns # deal test data val = dict(info=train['info'], categories=train['categories']) val['images'] = list(imgs_val.values()) anns = [] for key in imgs_val.keys(): anns += self.COCO.imgToAnns[key] val['annotations'] = anns if to_file: path, name = osp.split(to_file) cvtools.dump_json(train, to_file=osp.join(path, 'train_'+name)) cvtools.dump_json(val, to_file=osp.join(path, 'val_'+name)) return val, train
def save(self, crops, to_file, limit_border=False): """通过自然索引对齐两组数据要小心""" assert len(self.roidb) == len(crops) new_images = [] new_annotations = [] image_id = 1 ann_id = 1 for image_i in range(len(self.roidb)): image_info = self.roidb[image_i] ann_ids = self.COCO.getAnnIds(imgIds=image_info['id'], iscrowd=None) anns = self.COCO.loadAnns(ann_ids) if not crops[image_i]: continue for img_box, ann_indexes in crops[image_i].items(): new_image_info = copy.deepcopy(image_info) new_image_info['crop'] = img_box new_image_info['id'] = image_id new_images.append(new_image_info) crop_anns = [anns[index] for index in ann_indexes] # 不能修改原始数据,因为同一个ann可能分布在多个图片中 crop_anns = copy.deepcopy(crop_anns) if limit_border: self.recalc_anns(img_box, crop_anns) for ann in crop_anns: self.trans_ann(ann, img_box) ann['id'] = ann_id ann['image_id'] = image_id new_annotations.append(ann) ann_id += 1 image_id += 1 self.crop_dataset['images'] = new_images self.crop_dataset['annotations'] = new_annotations cvtools.dump_json(self.crop_dataset, to_file)
def stats_objs_per_cat(self, to_file='objs_per_cat_data.json'): cls_to_num = list() for cat_id in self.coco.catToImgs: item = dict() item['name'] = self.coco.cats[cat_id]['name'] item['value'] = len(self.coco.catToImgs[cat_id]) cls_to_num.append(item) cvtools.dump_json(cls_to_num, to_file=to_file)
def stats_objs_per_img(self, to_file='stats_num.json'): total_anns = 0 imgToNum = defaultdict() for cat_id, ann_ids in self.coco.catToImgs.items(): imgs = set(ann_ids) total_anns += len(ann_ids) assert len(imgs) > 0 cat_name = self.coco.cats[cat_id]['name'] imgToNum[cat_name] = len(ann_ids) / float(len(imgs)) imgToNum['total'] = total_anns / float(len(self.coco.imgs)) print(imgToNum) cvtools.dump_json(imgToNum, to_file)
def stats_size_per_cat(self, to_file='size_per_cat_data.json'): self.cat_size = defaultdict(list) for cat_id, dataset in enumerate(self.catToDatasets): self.cat_size[dataset['categories'][0]['name']] = [ ann_info['bbox'][2]*ann_info['bbox'][2] for ann_info in dataset['annotations']] self.cat_size = dict( sorted(self.cat_size.items(), key=lambda item: len(item[1]))) g2_data = [] size_split1 = pow(self.size_range[0], 2) size_split2 = pow(self.size_range[1], 2) for cat_name, sizes in self.cat_size.items(): data_dict = dict() data_dict['Category'] = cat_name data_dict['small'] = len( [size for size in sizes if size < size_split1]) data_dict['medium'] = len( [size for size in sizes if size_split2 >= size > size_split1]) data_dict['large'] = len( [size for size in sizes if size > size_split2]) g2_data.append(data_dict) cvtools.dump_json(g2_data, to_file)
def save_json(self, to_file): cvtools.dump_json(self.results, to_file)
def save(self, save='merge_coco.json'): cvtools.dump_json(self.merge_coco, save)