def test_list_to_file(): with tempfile.TemporaryDirectory() as tmpdirname: # test txt for i, lines in enumerate(lists): filename = f'{tmpdirname}/{i}.txt' list_to_file(filename, lines) lines2 = [ line.rstrip('\r\n') for line in open(filename, 'r', encoding='utf-8').readlines() ] lines = list(map(str, lines)) assert len(lines) == len(lines2) assert all(line1 == line2 for line1, line2 in zip(lines, lines2)) # test jsonl for i, lines in enumerate(dicts): filename = f'{tmpdirname}/{i}.jsonl' list_to_file(filename, [json.dumps(line) for line in lines]) lines2 = [ json.loads(line.rstrip('\r\n'))['text'] for line in open(filename, 'r', encoding='utf-8').readlines() ][0] lines = list(lines[0]['text']) assert len(lines) == len(lines2) assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
def process(closeset_file, openset_file, merge_bg_others=False, n_proc=10): closeset_lines = list_from_file(closeset_file) convert_func = partial(convert, merge_bg_others=merge_bg_others) openset_lines = mmcv.track_parallel_progress( convert_func, closeset_lines, nproc=n_proc) list_to_file(openset_file, openset_lines)
def save_2darray(mat, file_name): """Save 2d array to txt file. Args: mat (ndarray): 2d-array of shape (n, m). file_name (str): The output file name. """ lines = [','.join([str(x) for x in row]) for row in mat] list_to_file(file_name, lines)
def gen_line_dict_file(out_path, imgid2imgname, imgid2anno): lines = [] for key, value in imgid2imgname.items(): if key in imgid2anno: anno = imgid2anno[key] line_dict = {} line_dict['file_name'] = value['file_name'] line_dict['height'] = value['height'] line_dict['width'] = value['width'] line_dict['annotations'] = anno lines.append(json.dumps(line_dict)) list_to_file(out_path, lines)
def test_list_to_file(): with tempfile.TemporaryDirectory() as tmpdirname: for i, lines in enumerate(lists): filename = f'{tmpdirname}/{i}.txt' list_to_file(filename, lines) lines2 = [ line.rstrip('\r\n') for line in open(filename, 'r', encoding='utf-8').readlines() ] lines = list(map(str, lines)) assert len(lines) == len(lines2) assert all(line1 == line2 for line1, line2 in zip(lines, lines2))
def _create_dummy_ann_file(ann_file): data = { 'text': '彭小军认为,国内银行现在走的是台湾的发卡模式', 'label': { 'address': { '台湾': [[15, 16]] }, 'name': { '彭小军': [[0, 2]] } } } list_to_file(ann_file, [json.dumps(data, ensure_ascii=False)])
def gene_sdmgr_model_dataloader(cfg, dirname, curr_dir, empty_img=False): json_obj = { 'file_name': '1.jpg', 'height': 348, 'width': 348, 'annotations': [{ 'box': [114.0, 19.0, 230.0, 19.0, 230.0, 1.0, 114.0, 1.0], 'text': 'CHOEUN', 'label': 1 }] } ann_file = osp.join(dirname, 'test.txt') list_to_file(ann_file, [json.dumps(json_obj, ensure_ascii=False)]) if not empty_img: img = np.ones((348, 348, 3), dtype=np.uint8) img_file = osp.join(dirname, '1.jpg') mmcv.imwrite(img, img_file) test = copy.deepcopy(cfg.data.test) test.ann_file = ann_file test.img_prefix = dirname test.dict_file = osp.join(curr_dir, 'data/kie_toy_dataset/dict.txt') cfg.data.workers_per_gpu = 1 cfg.data.test = test cfg.model.class_list = osp.join(curr_dir, 'data/kie_toy_dataset/class_list.txt') dataset = build_dataset(cfg.data.test) loader_cfg = { **dict((k, cfg.data[k]) for k in [ 'workers_per_gpu', 'samples_per_gpu' ] if k in cfg.data) } test_loader_cfg = { **loader_cfg, **dict(shuffle=False, drop_last=False), **cfg.data.get('test_dataloader', {}) } data_loader = build_dataloader(dataset, **test_loader_cfg) model = build_model(cfg) return model, data_loader
def save_results(result, out_dir, img_name, score_thr=0.3): """Save result of detected bounding boxes (quadrangle or polygon) to txt file. Args: result (dict): Text Detection result for one image. img_name (str): Image file name. out_dir (str): Dir of txt files to save detected results. score_thr (float, optional): Score threshold to filter bboxes. """ assert 'boundary_result' in result assert score_thr > 0 and score_thr < 1 txt_file = gen_target_path(out_dir, img_name, '.txt') valid_boundary_res = [ res for res in result['boundary_result'] if res[-1] > score_thr ] lines = [ ','.join([str(round(x)) for x in row]) for row in valid_boundary_res ] list_to_file(txt_file, lines)
def _create_dummy_ann_file(ann_file): ann_info1 = { 'file_name': '1.png', 'height': 200, 'width': 200, 'annotations': [{ 'text': 'store', 'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0], 'label': 1, 'edge': 1 }, { 'text': 'MyFamily', 'box': [23.0, 2.0, 31.0, 1.0, 24.0, 11.0, 16.0, 11.0], 'label': 2, 'edge': 1 }] } list_to_file(ann_file, [json.dumps(ann_info1)]) return ann_info1
def process(json_dir, img_dir, out_dir, tasks=['det'], nproc=1, recog_format='jsonl', warp=False): mmcv.mkdir_or_exist(out_dir) json_file_list = glob.glob(osp.join(json_dir, '*.json')) parse_labelme_json_func = partial(parse_labelme_json, img_dir=img_dir, out_dir=out_dir, tasks=tasks, recog_format=recog_format, warp_flag=warp) if nproc <= 1: total_results = mmcv.track_progress(parse_labelme_json_func, json_file_list) else: total_results = mmcv.track_parallel_progress(parse_labelme_json_func, json_file_list, keep_order=True, nproc=nproc) total_det_line_json_list = [] total_recog_crop_line_str = [] total_recog_warp_line_str = [] for res in total_results: total_det_line_json_list.extend(res[0]) if 'recog' in tasks: total_recog_crop_line_str.extend(res[1]) total_recog_warp_line_str.extend(res[2]) mmcv.mkdir_or_exist(out_dir) det_out_file = osp.join(out_dir, 'instances_training.txt') list_to_file(det_out_file, total_det_line_json_list) if 'recog' in tasks: recog_out_file_crop = osp.join(out_dir, f'train_label.{recog_format}') list_to_file(recog_out_file_crop, total_recog_crop_line_str) if warp: recog_out_file_warp = osp.join(out_dir, f'warp_train_label.{recog_format}') list_to_file(recog_out_file_warp, total_recog_warp_line_str)
def save_results(img_paths, pred_labels, gt_labels, res_dir): """Save predicted results to txt file. Args: img_paths (list[str]) pred_labels (list[str]) gt_labels (list[str]) res_dir (str) """ assert len(img_paths) == len(pred_labels) == len(gt_labels) corrects = [pred == gt for pred, gt in zip(pred_labels, gt_labels)] wrongs = [not c for c in corrects] lines = [ f'{img} {pred} {gt}' for img, pred, gt in zip(img_paths, pred_labels, gt_labels) ] list_to_file(osp.join(res_dir, 'results.txt'), lines) list_to_file(osp.join(res_dir, 'correct.txt'), compress(lines, corrects)) list_to_file(osp.join(res_dir, 'wrong.txt'), compress(lines, wrongs))
def _create_dummy_vocab_file(vocab_file): for char in list(map(chr, range(ord('a'), ord('z') + 1))): list_to_file(vocab_file, [json.dumps(char + '\n', ensure_ascii=False)])
def _create_dummy_dict_file(dict_file): dict_str = '0123' list_to_file(dict_file, list(dict_str))