Esempio n. 1
0
 def save_cat_datasets(self, to_file):
     for dataset in self.catToDatasets:
         cvtools.dump_json(
             dataset,
             to_file=to_file.format(
                 dataset['categories'][0]['name'])
         )
Esempio n. 2
0
 def _cluster_by_cat(self,
                     save_root,
                     name_clusters=('bbox', ),
                     n_clusters=(3, )):
     assert len(name_clusters) == len(n_clusters)
     cluster_dict = defaultdict(lambda: defaultdict(list))
     for key, ann in self.COCO.anns.items():
         cat_name = self.COCO.cats[ann['category_id']]['name']
         if 'area' in name_clusters:
             cluster_dict[cat_name]['area'].append(ann['area'])
         if 'w-vs-h' in name_clusters:
             cluster_dict[cat_name]['w-vs-h'].append(ann['bbox'][2] /
                                                     float(ann['bbox'][3]))
     cvtools.makedirs(save_root)
     for cat_name, cluster_value in cluster_dict.items():
         cluster_values = cluster_dict[cat_name]
         cluster_results = defaultdict(lambda: defaultdict(list))
         for i, cluster_name in enumerate(cluster_values.keys()):
             if len(cluster_value) < n_clusters[i]:
                 continue
             centers = cvtools.k_means_cluster(
                 np.array(cluster_value).reshape(-1, 1),
                 n_clusters=n_clusters[i])
             cluster_results[cluster_name][cat_name].append(
                 list(centers.reshape(-1)))
         cvtools.dump_json(
             cluster_results,
             osp.join(save_root, 'cluster_{}.json'.format(cat_name)))
Esempio n. 3
0
    def split_dataset(self, val_size=0.1, to_file=None):
        imgs_train, imgs_val = cvtools.split_dict(self.COCO.imgs, val_size)
        print('images: {} train, {} test.'.format(len(imgs_train), len(imgs_val)))

        # deal train data
        train = dict(info=self.coco_dataset['info'],
                     categories=self.coco_dataset['categories'])
        train['images'] = list(imgs_train.values())  # bad design
        anns = []
        for key in imgs_train.keys():
            anns += self.COCO.imgToAnns[key]
            train['annotations'] = anns

        # deal test data
        val = dict(info=train['info'], categories=train['categories'])
        val['images'] = list(imgs_val.values())
        anns = []
        for key in imgs_val.keys():
            anns += self.COCO.imgToAnns[key]
            val['annotations'] = anns

        if to_file:
            path, name = osp.split(to_file)
            cvtools.dump_json(train, to_file=osp.join(path, 'train_'+name))
            cvtools.dump_json(val, to_file=osp.join(path, 'val_'+name))
        return val, train
Esempio n. 4
0
 def save(self, crops, to_file, limit_border=False):
     """通过自然索引对齐两组数据要小心"""
     assert len(self.roidb) == len(crops)
     new_images = []
     new_annotations = []
     image_id = 1
     ann_id = 1
     for image_i in range(len(self.roidb)):
         image_info = self.roidb[image_i]
         ann_ids = self.COCO.getAnnIds(imgIds=image_info['id'],
                                       iscrowd=None)
         anns = self.COCO.loadAnns(ann_ids)
         if not crops[image_i]: continue
         for img_box, ann_indexes in crops[image_i].items():
             new_image_info = copy.deepcopy(image_info)
             new_image_info['crop'] = img_box
             new_image_info['id'] = image_id
             new_images.append(new_image_info)
             crop_anns = [anns[index] for index in ann_indexes]
             # 不能修改原始数据,因为同一个ann可能分布在多个图片中
             crop_anns = copy.deepcopy(crop_anns)
             if limit_border:
                 self.recalc_anns(img_box, crop_anns)
             for ann in crop_anns:
                 self.trans_ann(ann, img_box)
                 ann['id'] = ann_id
                 ann['image_id'] = image_id
                 new_annotations.append(ann)
                 ann_id += 1
             image_id += 1
     self.crop_dataset['images'] = new_images
     self.crop_dataset['annotations'] = new_annotations
     cvtools.dump_json(self.crop_dataset, to_file)
Esempio n. 5
0
 def stats_objs_per_cat(self, to_file='objs_per_cat_data.json'):
     cls_to_num = list()
     for cat_id in self.coco.catToImgs:
         item = dict()
         item['name'] = self.coco.cats[cat_id]['name']
         item['value'] = len(self.coco.catToImgs[cat_id])
         cls_to_num.append(item)
     cvtools.dump_json(cls_to_num, to_file=to_file)
Esempio n. 6
0
 def stats_objs_per_img(self, to_file='stats_num.json'):
     total_anns = 0
     imgToNum = defaultdict()
     for cat_id, ann_ids in self.coco.catToImgs.items():
         imgs = set(ann_ids)
         total_anns += len(ann_ids)
         assert len(imgs) > 0
         cat_name = self.coco.cats[cat_id]['name']
         imgToNum[cat_name] = len(ann_ids) / float(len(imgs))
     imgToNum['total'] = total_anns / float(len(self.coco.imgs))
     print(imgToNum)
     cvtools.dump_json(imgToNum, to_file)
Esempio n. 7
0
 def stats_size_per_cat(self, to_file='size_per_cat_data.json'):
     self.cat_size = defaultdict(list)
     for cat_id, dataset in enumerate(self.catToDatasets):
         self.cat_size[dataset['categories'][0]['name']] = [
             ann_info['bbox'][2]*ann_info['bbox'][2]
             for ann_info in dataset['annotations']]
     self.cat_size = dict(
         sorted(self.cat_size.items(), key=lambda item: len(item[1])))
     g2_data = []
     size_split1 = pow(self.size_range[0], 2)
     size_split2 = pow(self.size_range[1], 2)
     for cat_name, sizes in self.cat_size.items():
         data_dict = dict()
         data_dict['Category'] = cat_name
         data_dict['small'] = len(
             [size for size in sizes if size < size_split1])
         data_dict['medium'] = len(
             [size for size in sizes if size_split2 >= size > size_split1])
         data_dict['large'] = len(
             [size for size in sizes if size > size_split2])
         g2_data.append(data_dict)
     cvtools.dump_json(g2_data, to_file)
Esempio n. 8
0
 def save_json(self, to_file):
     cvtools.dump_json(self.results, to_file)
Esempio n. 9
0
 def save(self, save='merge_coco.json'):
     cvtools.dump_json(self.merge_coco, save)