Exemple #1
0
def test_list_to_file():
    with tempfile.TemporaryDirectory() as tmpdirname:
        # test txt
        for i, lines in enumerate(lists):
            filename = f'{tmpdirname}/{i}.txt'
            list_to_file(filename, lines)
            lines2 = [
                line.rstrip('\r\n')
                for line in open(filename, 'r', encoding='utf-8').readlines()
            ]
            lines = list(map(str, lines))
            assert len(lines) == len(lines2)
            assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
        # test jsonl
        for i, lines in enumerate(dicts):
            filename = f'{tmpdirname}/{i}.jsonl'
            list_to_file(filename, [json.dumps(line) for line in lines])
            lines2 = [
                json.loads(line.rstrip('\r\n'))['text']
                for line in open(filename, 'r', encoding='utf-8').readlines()
            ][0]

            lines = list(lines[0]['text'])
            assert len(lines) == len(lines2)
            assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
def process(closeset_file, openset_file, merge_bg_others=False, n_proc=10):
    closeset_lines = list_from_file(closeset_file)

    convert_func = partial(convert, merge_bg_others=merge_bg_others)

    openset_lines = mmcv.track_parallel_progress(
        convert_func, closeset_lines, nproc=n_proc)

    list_to_file(openset_file, openset_lines)
Exemple #3
0
def save_2darray(mat, file_name):
    """Save 2d array to txt file.

    Args:
        mat (ndarray): 2d-array of shape (n, m).
        file_name (str): The output file name.
    """
    lines = [','.join([str(x) for x in row]) for row in mat]
    list_to_file(file_name, lines)
def gen_line_dict_file(out_path, imgid2imgname, imgid2anno):
    lines = []
    for key, value in imgid2imgname.items():
        if key in imgid2anno:
            anno = imgid2anno[key]
            line_dict = {}
            line_dict['file_name'] = value['file_name']
            line_dict['height'] = value['height']
            line_dict['width'] = value['width']
            line_dict['annotations'] = anno
            lines.append(json.dumps(line_dict))
    list_to_file(out_path, lines)
Exemple #5
0
def test_list_to_file():
    with tempfile.TemporaryDirectory() as tmpdirname:
        for i, lines in enumerate(lists):
            filename = f'{tmpdirname}/{i}.txt'
            list_to_file(filename, lines)
            lines2 = [
                line.rstrip('\r\n')
                for line in open(filename, 'r', encoding='utf-8').readlines()
            ]
            lines = list(map(str, lines))
            assert len(lines) == len(lines2)
            assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
Exemple #6
0
def _create_dummy_ann_file(ann_file):
    data = {
        'text': '彭小军认为,国内银行现在走的是台湾的发卡模式',
        'label': {
            'address': {
                '台湾': [[15, 16]]
            },
            'name': {
                '彭小军': [[0, 2]]
            }
        }
    }

    list_to_file(ann_file, [json.dumps(data, ensure_ascii=False)])
Exemple #7
0
def gene_sdmgr_model_dataloader(cfg, dirname, curr_dir, empty_img=False):
    json_obj = {
        'file_name':
        '1.jpg',
        'height':
        348,
        'width':
        348,
        'annotations': [{
            'box': [114.0, 19.0, 230.0, 19.0, 230.0, 1.0, 114.0, 1.0],
            'text':
            'CHOEUN',
            'label':
            1
        }]
    }
    ann_file = osp.join(dirname, 'test.txt')
    list_to_file(ann_file, [json.dumps(json_obj, ensure_ascii=False)])

    if not empty_img:
        img = np.ones((348, 348, 3), dtype=np.uint8)
        img_file = osp.join(dirname, '1.jpg')
        mmcv.imwrite(img, img_file)

    test = copy.deepcopy(cfg.data.test)
    test.ann_file = ann_file
    test.img_prefix = dirname
    test.dict_file = osp.join(curr_dir, 'data/kie_toy_dataset/dict.txt')
    cfg.data.workers_per_gpu = 1
    cfg.data.test = test
    cfg.model.class_list = osp.join(curr_dir,
                                    'data/kie_toy_dataset/class_list.txt')

    dataset = build_dataset(cfg.data.test)

    loader_cfg = {
        **dict((k, cfg.data[k]) for k in [
                   'workers_per_gpu', 'samples_per_gpu'
               ] if k in cfg.data)
    }
    test_loader_cfg = {
        **loader_cfg,
        **dict(shuffle=False, drop_last=False),
        **cfg.data.get('test_dataloader', {})
    }

    data_loader = build_dataloader(dataset, **test_loader_cfg)
    model = build_model(cfg)

    return model, data_loader
Exemple #8
0
def save_results(result, out_dir, img_name, score_thr=0.3):
    """Save result of detected bounding boxes (quadrangle or polygon) to txt
    file.

    Args:
        result (dict): Text Detection result for one image.
        img_name (str): Image file name.
        out_dir (str): Dir of txt files to save detected results.
        score_thr (float, optional): Score threshold to filter bboxes.
    """
    assert 'boundary_result' in result
    assert score_thr > 0 and score_thr < 1

    txt_file = gen_target_path(out_dir, img_name, '.txt')
    valid_boundary_res = [
        res for res in result['boundary_result'] if res[-1] > score_thr
    ]
    lines = [
        ','.join([str(round(x)) for x in row]) for row in valid_boundary_res
    ]
    list_to_file(txt_file, lines)
Exemple #9
0
def _create_dummy_ann_file(ann_file):
    ann_info1 = {
        'file_name':
        '1.png',
        'height':
        200,
        'width':
        200,
        'annotations': [{
            'text': 'store',
            'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0],
            'label': 1,
            'edge': 1
        }, {
            'text': 'MyFamily',
            'box': [23.0, 2.0, 31.0, 1.0, 24.0, 11.0, 16.0, 11.0],
            'label': 2,
            'edge': 1
        }]
    }
    list_to_file(ann_file, [json.dumps(ann_info1)])

    return ann_info1
Exemple #10
0
def process(json_dir,
            img_dir,
            out_dir,
            tasks=['det'],
            nproc=1,
            recog_format='jsonl',
            warp=False):
    mmcv.mkdir_or_exist(out_dir)

    json_file_list = glob.glob(osp.join(json_dir, '*.json'))

    parse_labelme_json_func = partial(parse_labelme_json,
                                      img_dir=img_dir,
                                      out_dir=out_dir,
                                      tasks=tasks,
                                      recog_format=recog_format,
                                      warp_flag=warp)

    if nproc <= 1:
        total_results = mmcv.track_progress(parse_labelme_json_func,
                                            json_file_list)
    else:
        total_results = mmcv.track_parallel_progress(parse_labelme_json_func,
                                                     json_file_list,
                                                     keep_order=True,
                                                     nproc=nproc)

    total_det_line_json_list = []
    total_recog_crop_line_str = []
    total_recog_warp_line_str = []
    for res in total_results:
        total_det_line_json_list.extend(res[0])
        if 'recog' in tasks:
            total_recog_crop_line_str.extend(res[1])
            total_recog_warp_line_str.extend(res[2])

    mmcv.mkdir_or_exist(out_dir)
    det_out_file = osp.join(out_dir, 'instances_training.txt')
    list_to_file(det_out_file, total_det_line_json_list)

    if 'recog' in tasks:
        recog_out_file_crop = osp.join(out_dir, f'train_label.{recog_format}')
        list_to_file(recog_out_file_crop, total_recog_crop_line_str)
        if warp:
            recog_out_file_warp = osp.join(out_dir,
                                           f'warp_train_label.{recog_format}')
            list_to_file(recog_out_file_warp, total_recog_warp_line_str)
Exemple #11
0
def save_results(img_paths, pred_labels, gt_labels, res_dir):
    """Save predicted results to txt file.

    Args:
        img_paths (list[str])
        pred_labels (list[str])
        gt_labels (list[str])
        res_dir (str)
    """
    assert len(img_paths) == len(pred_labels) == len(gt_labels)
    corrects = [pred == gt for pred, gt in zip(pred_labels, gt_labels)]
    wrongs = [not c for c in corrects]
    lines = [
        f'{img} {pred} {gt}'
        for img, pred, gt in zip(img_paths, pred_labels, gt_labels)
    ]
    list_to_file(osp.join(res_dir, 'results.txt'), lines)
    list_to_file(osp.join(res_dir, 'correct.txt'), compress(lines, corrects))
    list_to_file(osp.join(res_dir, 'wrong.txt'), compress(lines, wrongs))
Exemple #12
0
def _create_dummy_vocab_file(vocab_file):
    for char in list(map(chr, range(ord('a'), ord('z') + 1))):
        list_to_file(vocab_file, [json.dumps(char + '\n', ensure_ascii=False)])
Exemple #13
0
def _create_dummy_dict_file(dict_file):
    dict_str = '0123'
    list_to_file(dict_file, list(dict_str))