Exemple #1
0
 def test_starts_and_width_height(self):
     b = BoundingBox(start_x=start.x,
                     start_y=start.y,
                     width=width,
                     height=height)
     self.compare_points(b.start, start)
     self.compare_points(b.end, end)
def get_bounding_box_and_baseline(line_tag):
    bbox = None
    baseline = None

    title = line_tag.attrib["title"]
    title_parts = title.split(";")

    bbox_part = get_part("bbox", title_parts)
    if bbox_part is not None:
        bbox_parts = bbox_part.split()
        bbox = BoundingBox(start_x=int(bbox_parts[1]),
                           start_y=int(bbox_parts[2]),
                           end_x=int(bbox_parts[3]),
                           end_y=int(bbox_parts[4]))

        baseline_part = get_part("baseline", title_parts)
        if baseline_part is not None:
            baseline_parts = baseline_part.split()

            baseline_slope = float(baseline_parts[1])
            baseline_offset = int(float(baseline_parts[2]))
            bbox_width = bbox.end.x - bbox.start.x

            baseline_y_start = bbox.end.y + baseline_offset
            baseline_y_end = int(bbox.end.y + baseline_offset +
                                 bbox_width * baseline_slope)

            baseline = Baseline(start_x=bbox.start.x,
                                start_y=baseline_y_start,
                                end_x=bbox.end.x,
                                end_y=baseline_y_end)

    return bbox, baseline
Exemple #3
0
 def test_starts_and_ends(self):
     b = BoundingBox(start_x=start.x,
                     start_y=start.y,
                     end_x=end.x,
                     end_y=end.y)
     self.compare_points(b.start, start)
     self.compare_points(b.end, end)
def process_dataset(dataset, log_path):
    new_dataset = Dataset()

    total_pages = len(dataset.pages)

    for index, page_id in enumerate(dataset.pages):
        print("\r{current}/{total} ({percentage:.2f} %)".format(
            current=index + 1,
            total=total_pages,
            percentage=100 * float(index + 1) / total_pages),
              end="")

        transformation = read_log(
            os.path.join(log_path,
                         page_id.split(".")[0] + ".jpg.log"))

        try:
            inversion = np.linalg.inv(transformation)
        except:
            print("\nPage ID: {id}".format(id=page_id))
            print("Transformation matrix: {matrix}".format(
                matrix=transformation))
            continue

        page = dataset.get_page(page_id)

        lines = []

        if page.lines is not None:
            for line in page.lines:
                new_bb_start = transform_point(line.bounding_box.start,
                                               inversion)
                new_bb_end = transform_point(line.bounding_box.end, inversion)

                new_bb_inner_points = []

                if line.bounding_box.inner_points is not None:
                    for point in line.bounding_box.inner_points:
                        new_bb_inner_points.append(
                            transform_point(point, inversion))

                new_baseline_start = transform_point(line.baseline.start,
                                                     inversion)
                new_baseline_end = transform_point(line.baseline.end,
                                                   inversion)

                lines.append(
                    Line(
                        line.text,
                        BoundingBox(new_bb_start, new_bb_end,
                                    new_bb_inner_points),
                        Baseline(new_baseline_start, new_baseline_end), None))

        new_dataset.add_page(Page(page_id + ".jpg_rec", lines))

    print()

    return new_dataset
Exemple #5
0
 def test_empty_raises(self):
     with self.assertRaises(ValueError):
         b = BoundingBox()
Exemple #6
0
 def test_starts_and_height_raises(self):
     with self.assertRaises(ValueError):
         b = BoundingBox(start_x=start.x, start_y=start.y, height=height)
Exemple #7
0
 def test_start_and_height_raises(self):
     with self.assertRaises(ValueError):
         b = BoundingBox(start=start, height=height)
Exemple #8
0
 def test_starts_and_width_raises(self):
     with self.assertRaises(ValueError):
         b = BoundingBox(start_x=start.x, start_y=start.y, width=width)
Exemple #9
0
 def test_start_and_width_raises(self):
     with self.assertRaises(ValueError):
         b = BoundingBox(start=start, width=width)
Exemple #10
0
 def test_starts_and_end_y_raises(self):
     with self.assertRaises(ValueError):
         b = BoundingBox(start_x=start.x, start_y=start.y, end_y=end.y)
Exemple #11
0
 def test_start_and_end_x_raises(self):
     with self.assertRaises(ValueError):
         b = BoundingBox(start=start, end_x=end.x)
Exemple #12
0
 def test_start_and_end(self):
     b = BoundingBox(start=start, end=end)
     self.compare_points(b.start, start)
     self.compare_points(b.end, end)
Exemple #13
0
 def test_ends_raises(self):
     with self.assertRaises(ValueError):
         b = BoundingBox(end_x=end.x, end_y=end.y)
Exemple #14
0
 def test_end_raises(self):
     with self.assertRaises(ValueError):
         b = BoundingBox(end=end)
Exemple #15
0
 def test_start_raises(self):
     with self.assertRaises(ValueError):
         b = BoundingBox(start=start)