def annonation_to_text(json_filename, lang): rects = data.read_LabelMe_annotation(label_filename=json_filename, get_points=False) boxes = [r[:4] for r in rects] labels = [r[4] for r in rects] lines = postprocess.boxes_to_lines(boxes, labels, lang=lang) return postprocess.lines_to_text(lines)
def prepare_data(datasets=datasets): """ data (datasets defined above as global) -> dict: key - list of dict (image_fn":full image filename, "gt_text": groundtruth pseudotext, "gt_rects": groundtruth rects + label 0..64) :return: """ res_dict = dict() for key, list_file_names in datasets.items(): data_list = list() res_dict[key] = data_list for list_file_name in list_file_names: list_file = os.path.join(local_config.data_path, list_file_name) data_dir = os.path.dirname(list_file) with open(list_file, 'r') as f: files = f.readlines() for fn in files: if fn[-1] == '\n': fn = fn[:-1] fn = fn.replace('\\', '/') full_fn = os.path.join(data_dir, fn) if os.path.isfile(full_fn): rects = None lbl_fn = full_fn.rsplit('.', 1)[0] + '.json' if os.path.isfile(lbl_fn): rects = data.read_LabelMe_annotation( label_filename=lbl_fn, get_points=False) else: lbl_fn = full_fn.rsplit('.', 1)[0] + '.txt' if os.path.isfile(lbl_fn): img = PIL.Image.open(full_fn) rects = dsbi.read_DSBI_annotation( label_filename=lbl_fn, width=img.width, height=img.height, rect_margin=rect_margin, get_points=False) else: full_fn = full_fn.rsplit('.', 1)[0] + '+recto.jpg' lbl_fn = full_fn.rsplit('.', 1)[0] + '.txt' if os.path.isfile(lbl_fn): img = PIL.Image.open(full_fn) rects = dsbi.read_DSBI_annotation( label_filename=lbl_fn, width=img.width, height=img.height, rect_margin=rect_margin, get_points=False) if rects is not None: boxes = [r[:4] for r in rects] labels = [r[4] for r in rects] lines = postprocess.boxes_to_lines(boxes, labels, lang=lang) gt_text = lines_to_pseudotext(lines) data_list.append({ "image_fn": full_fn, "gt_text": gt_text, "gt_rects": rects }) return res_dict
def run_impl(self, img, lang, draw_refined, find_orientation, process_2_sides, align, draw, gt_rects=[]): t = timeit.default_timer() np_img = np.asarray(img) aug_img, aug_gt_rects = self.preprocessor.preprocess_and_augment(np_img, gt_rects) aug_img = data.unify_shape(aug_img) input_tensor = self.preprocessor.to_normalized_tensor(aug_img, device=self.impl.device) input_tensor_rotated = torch.tensor(0).to(self.impl.device) aug_img_rot = None if find_orientation: np_img_rot = np.rot90(np_img, 1, (0,1)) aug_img_rot = self.preprocessor.preprocess_and_augment(np_img_rot)[0] aug_img_rot = data.unify_shape(aug_img_rot) input_tensor_rotated = self.preprocessor.to_normalized_tensor(aug_img_rot, device=self.impl.device) if self.verbose >= 2: print(" run_impl.make_batch", timeit.default_timer() - t) t = timeit.default_timer() with torch.no_grad(): boxes, labels, scores, best_idx, err_score, boxes2, labels2, scores2 = self.impl( input_tensor, input_tensor_rotated, find_orientation=find_orientation, process_2_sides=process_2_sides) if self.verbose >= 2: print(" run_impl.impl", timeit.default_timer() - t) t = timeit.default_timer() #boxes = self.refine_boxes(boxes) boxes = boxes.tolist() labels = labels.tolist() scores = scores.tolist() lines = postprocess.boxes_to_lines(boxes, labels, lang = lang) self.refine_lines(lines) if process_2_sides: #boxes2 = self.refine_boxes(boxes2) boxes2 = boxes2.tolist() labels2 = labels2.tolist() scores2 = scores2.tolist() lines2 = postprocess.boxes_to_lines(boxes2, labels2, lang=lang) self.refine_lines(lines2) aug_img = PIL.Image.fromarray(aug_img if best_idx < OrientationAttempts.ROT90 else aug_img_rot) if best_idx in (OrientationAttempts.ROT180, OrientationAttempts.ROT270): aug_img = aug_img.transpose(PIL.Image.ROTATE_180) if self.verbose >= 2: print(" run_impl.postprocess", timeit.default_timer() - t) # aug_img.save(Path(results_dir) / 'aug_{}.jpg'.format(align)) # aug_img.save(Path(results_dir) / 'aug_{}_100.jpg'.format(align), quality = 100) t = timeit.default_timer() if align and not process_2_sides: hom = postprocess.find_transformation(lines, (aug_img.width, aug_img.height)) if hom is not None: aug_img = postprocess.transform_image(aug_img, hom) boxes = postprocess.transform_rects(boxes, hom) lines = postprocess.boxes_to_lines(boxes, labels, lang=lang) self.refine_lines(lines) aug_gt_rects = postprocess.transform_rects(aug_gt_rects, hom) if self.verbose >= 2: print(" run_impl.align", timeit.default_timer() - t) # aug_img.save(Path(results_dir) / 'aligned_{}.jpg'.format(align)) # aug_img.save(Path(results_dir) / 'aligned_{}_100.jpg'.format(align), quality = 100) t = timeit.default_timer() else: hom = None results_dict = { 'image': aug_img, 'best_idx': best_idx, 'err_scores': list([ten.cpu().data.tolist() for ten in err_score]), 'gt_rects': aug_gt_rects, 'homography': hom.tolist() if hom is not None else hom, } if draw: results_dict.update(self.draw_results(aug_img, boxes, lines, labels, scores, False, draw_refined)) if process_2_sides: aug_img = aug_img.transpose(PIL.Image.FLIP_LEFT_RIGHT) results_dict.update(self.draw_results(aug_img, boxes2, lines2, labels2, scores2, True, draw_refined)) if self.verbose >= 2: print(" run_impl.draw", timeit.default_timer() - t) return results_dict