Esempio n. 1
0
    def details(self):
        """ 返回详细的分析表

        按行罗列所有键,按列罗列所有字典,中间显示各字典键值
        可以把结果保存到excel,然后详细筛选分析

        >>> dc = DictCmper({'d1': {'a': 1, 'b': 2}, 'd2': {'b': 3, 'e': 5}, 'd3': {'d': 4}})
        >>> dc.details()
            d1   d2   d3
        a  1.0  NaN  NaN
        b  2.0  3.0  NaN
        e  NaN  5.0  NaN
        d  NaN  NaN  4.0
        """
        # 1 获得所有键
        #   集合无法保存元素顺序,所以用合并字典来代替
        #   还有个三方库orderedset,不想安装。就这样简便解决就好。
        keys = DictTool.or_(*self.dicts.values()).keys()

        # 2 取出所有字典值
        ls = []
        for k in keys:
            ls.append([(d[k] if k in d else np.nan)
                       for d in self.dicts.values()])

        # 3 转为df表格
        df = pd.DataFrame.from_records(ls, columns=self.dicts.keys())
        df.index = keys
        return df
Esempio n. 2
0
    def to_labelme_cls(self, root, *, bbox=True, seg=False, info=False):
        """
        :param root: 图片根目录
        :return:
            extdata,存储了一些匹配异常信息
        """
        root, data = Dir(root), {}
        catid2name = {x['id']: x['name'] for x in self.gt_dict['categories']}

        # 1 准备工作,构建文件名索引字典
        gs = PathGroups.groupby(root.select_files('**/*'))

        # 2 遍历生成labelme数据
        not_finds = set()  # coco里有的图片,root里没有找到
        multimatch = dict()  # coco里的某张图片,在root找到多个匹配文件
        for img, anns in tqdm(self.group_gt(reserve_empty=True),
                              disable=not info):
            # 2.1 文件匹配
            imfiles = gs.find_files(img['file_name'])
            if not imfiles:  # 没有匹配图片的,不处理
                not_finds.add(img['file_name'])
                continue
            elif len(imfiles) > 1:
                multimatch[img['file_name']] = imfiles
                imfile = imfiles[0]
            else:
                imfile = imfiles[0]

            # 2.2 数据内容转换
            lmdict = LabelmeDict.gen_data(imfile)
            img = DictTool.or_(img, {'xltype': 'image'})
            lmdict['shapes'].append(
                LabelmeDict.gen_shape(json.dumps(img, ensure_ascii=False),
                                      [[-10, 0], [-5, 0]]))
            for ann in anns:
                if bbox:
                    ann = DictTool.or_(
                        ann, {'category_name': catid2name[ann['category_id']]})
                    label = json.dumps(ann, ensure_ascii=False)
                    shape = LabelmeDict.gen_shape(label,
                                                  xywh2ltrb(ann['bbox']))
                    lmdict['shapes'].append(shape)

                if seg:
                    # 把分割也显示出来(用灰色)
                    for x in ann['segmentation']:
                        an = {
                            'box_id': ann['id'],
                            'xltype': 'seg',
                            'shape_color': [191, 191, 191]
                        }
                        label = json.dumps(an, ensure_ascii=False)
                        lmdict['shapes'].append(LabelmeDict.gen_shape(
                            label, x))

            f = imfile.with_suffix('.json')

            data[f.relpath(root)] = lmdict

        return LabelmeDataset(root,
                              data,
                              extdata={
                                  'categories': self.gt_dict['categories'],
                                  'not_finds': not_finds,
                                  'multimatch': Groups(multimatch)
                              })