Exemple #1
0
def annonation_to_text(json_filename, lang):
    rects = data.read_LabelMe_annotation(label_filename=json_filename,
                                         get_points=False)
    boxes = [r[:4] for r in rects]
    labels = [r[4] for r in rects]
    lines = postprocess.boxes_to_lines(boxes, labels, lang=lang)
    return postprocess.lines_to_text(lines)
Exemple #2
0
def prepare_data(datasets=datasets):
    """
    data (datasets defined above as global) -> dict: key - list of dict (image_fn":full image filename, "gt_text": groundtruth pseudotext, "gt_rects": groundtruth rects + label 0..64)
    :return:
    """
    res_dict = dict()
    for key, list_file_names in datasets.items():
        data_list = list()
        res_dict[key] = data_list
        for list_file_name in list_file_names:
            list_file = os.path.join(local_config.data_path, list_file_name)
            data_dir = os.path.dirname(list_file)
            with open(list_file, 'r') as f:
                files = f.readlines()
            for fn in files:
                if fn[-1] == '\n':
                    fn = fn[:-1]
                fn = fn.replace('\\', '/')
                full_fn = os.path.join(data_dir, fn)
                if os.path.isfile(full_fn):
                    rects = None
                    lbl_fn = full_fn.rsplit('.', 1)[0] + '.json'
                    if os.path.isfile(lbl_fn):
                        rects = data.read_LabelMe_annotation(
                            label_filename=lbl_fn, get_points=False)
                    else:
                        lbl_fn = full_fn.rsplit('.', 1)[0] + '.txt'
                        if os.path.isfile(lbl_fn):
                            img = PIL.Image.open(full_fn)
                            rects = dsbi.read_DSBI_annotation(
                                label_filename=lbl_fn,
                                width=img.width,
                                height=img.height,
                                rect_margin=rect_margin,
                                get_points=False)
                        else:
                            full_fn = full_fn.rsplit('.', 1)[0] + '+recto.jpg'
                            lbl_fn = full_fn.rsplit('.', 1)[0] + '.txt'
                            if os.path.isfile(lbl_fn):
                                img = PIL.Image.open(full_fn)
                                rects = dsbi.read_DSBI_annotation(
                                    label_filename=lbl_fn,
                                    width=img.width,
                                    height=img.height,
                                    rect_margin=rect_margin,
                                    get_points=False)
                    if rects is not None:
                        boxes = [r[:4] for r in rects]
                        labels = [r[4] for r in rects]
                        lines = postprocess.boxes_to_lines(boxes,
                                                           labels,
                                                           lang=lang)
                        gt_text = lines_to_pseudotext(lines)
                        data_list.append({
                            "image_fn": full_fn,
                            "gt_text": gt_text,
                            "gt_rects": rects
                        })
    return res_dict
    def run_impl(self, img, lang, draw_refined, find_orientation, process_2_sides, align, draw, gt_rects=[]):
        t = timeit.default_timer()
        np_img = np.asarray(img)
        aug_img, aug_gt_rects = self.preprocessor.preprocess_and_augment(np_img, gt_rects)
        aug_img = data.unify_shape(aug_img)
        input_tensor = self.preprocessor.to_normalized_tensor(aug_img, device=self.impl.device)
        input_tensor_rotated = torch.tensor(0).to(self.impl.device)

        aug_img_rot = None
        if find_orientation:
            np_img_rot = np.rot90(np_img, 1, (0,1))
            aug_img_rot = self.preprocessor.preprocess_and_augment(np_img_rot)[0]
            aug_img_rot = data.unify_shape(aug_img_rot)
            input_tensor_rotated = self.preprocessor.to_normalized_tensor(aug_img_rot, device=self.impl.device)

        if self.verbose >= 2:
            print("    run_impl.make_batch", timeit.default_timer() - t)
            t = timeit.default_timer()

        with torch.no_grad():
            boxes, labels, scores, best_idx, err_score, boxes2, labels2, scores2 = self.impl(
                input_tensor, input_tensor_rotated, find_orientation=find_orientation, process_2_sides=process_2_sides)
        if self.verbose >= 2:
            print("    run_impl.impl", timeit.default_timer() - t)
            t = timeit.default_timer()

        #boxes = self.refine_boxes(boxes)
        boxes = boxes.tolist()
        labels = labels.tolist()
        scores = scores.tolist()
        lines = postprocess.boxes_to_lines(boxes, labels, lang = lang)
        self.refine_lines(lines)

        if process_2_sides:
            #boxes2 = self.refine_boxes(boxes2)
            boxes2 = boxes2.tolist()
            labels2 = labels2.tolist()
            scores2 = scores2.tolist()
            lines2 = postprocess.boxes_to_lines(boxes2, labels2, lang=lang)
            self.refine_lines(lines2)

        aug_img = PIL.Image.fromarray(aug_img if best_idx < OrientationAttempts.ROT90 else aug_img_rot)
        if best_idx in (OrientationAttempts.ROT180, OrientationAttempts.ROT270):
            aug_img = aug_img.transpose(PIL.Image.ROTATE_180)

        if self.verbose >= 2:
            print("    run_impl.postprocess", timeit.default_timer() - t)
            # aug_img.save(Path(results_dir) / 'aug_{}.jpg'.format(align))
            # aug_img.save(Path(results_dir) / 'aug_{}_100.jpg'.format(align), quality = 100)
            t = timeit.default_timer()

        if align and not process_2_sides:
            hom = postprocess.find_transformation(lines, (aug_img.width, aug_img.height))
            if hom is not None:
                aug_img = postprocess.transform_image(aug_img, hom)
                boxes = postprocess.transform_rects(boxes, hom)
                lines = postprocess.boxes_to_lines(boxes, labels, lang=lang)
                self.refine_lines(lines)
                aug_gt_rects = postprocess.transform_rects(aug_gt_rects, hom)
            if self.verbose >= 2:
                print("    run_impl.align", timeit.default_timer() - t)
                # aug_img.save(Path(results_dir) / 'aligned_{}.jpg'.format(align))
                # aug_img.save(Path(results_dir) / 'aligned_{}_100.jpg'.format(align), quality = 100)
                t = timeit.default_timer()
        else:
            hom = None

        results_dict = {
            'image': aug_img,
            'best_idx': best_idx,
            'err_scores': list([ten.cpu().data.tolist() for ten in err_score]),
            'gt_rects': aug_gt_rects,
            'homography': hom.tolist() if hom is not None else hom,
        }

        if draw:
            results_dict.update(self.draw_results(aug_img, boxes, lines, labels, scores, False, draw_refined))
            if process_2_sides:
                aug_img = aug_img.transpose(PIL.Image.FLIP_LEFT_RIGHT)
                results_dict.update(self.draw_results(aug_img, boxes2, lines2, labels2, scores2, True, draw_refined))
            if self.verbose >= 2:
                print("    run_impl.draw", timeit.default_timer() - t)

        return results_dict