def test_starts_and_width_height(self): b = BoundingBox(start_x=start.x, start_y=start.y, width=width, height=height) self.compare_points(b.start, start) self.compare_points(b.end, end)
def get_bounding_box_and_baseline(line_tag): bbox = None baseline = None title = line_tag.attrib["title"] title_parts = title.split(";") bbox_part = get_part("bbox", title_parts) if bbox_part is not None: bbox_parts = bbox_part.split() bbox = BoundingBox(start_x=int(bbox_parts[1]), start_y=int(bbox_parts[2]), end_x=int(bbox_parts[3]), end_y=int(bbox_parts[4])) baseline_part = get_part("baseline", title_parts) if baseline_part is not None: baseline_parts = baseline_part.split() baseline_slope = float(baseline_parts[1]) baseline_offset = int(float(baseline_parts[2])) bbox_width = bbox.end.x - bbox.start.x baseline_y_start = bbox.end.y + baseline_offset baseline_y_end = int(bbox.end.y + baseline_offset + bbox_width * baseline_slope) baseline = Baseline(start_x=bbox.start.x, start_y=baseline_y_start, end_x=bbox.end.x, end_y=baseline_y_end) return bbox, baseline
def test_starts_and_ends(self): b = BoundingBox(start_x=start.x, start_y=start.y, end_x=end.x, end_y=end.y) self.compare_points(b.start, start) self.compare_points(b.end, end)
def process_dataset(dataset, log_path): new_dataset = Dataset() total_pages = len(dataset.pages) for index, page_id in enumerate(dataset.pages): print("\r{current}/{total} ({percentage:.2f} %)".format( current=index + 1, total=total_pages, percentage=100 * float(index + 1) / total_pages), end="") transformation = read_log( os.path.join(log_path, page_id.split(".")[0] + ".jpg.log")) try: inversion = np.linalg.inv(transformation) except: print("\nPage ID: {id}".format(id=page_id)) print("Transformation matrix: {matrix}".format( matrix=transformation)) continue page = dataset.get_page(page_id) lines = [] if page.lines is not None: for line in page.lines: new_bb_start = transform_point(line.bounding_box.start, inversion) new_bb_end = transform_point(line.bounding_box.end, inversion) new_bb_inner_points = [] if line.bounding_box.inner_points is not None: for point in line.bounding_box.inner_points: new_bb_inner_points.append( transform_point(point, inversion)) new_baseline_start = transform_point(line.baseline.start, inversion) new_baseline_end = transform_point(line.baseline.end, inversion) lines.append( Line( line.text, BoundingBox(new_bb_start, new_bb_end, new_bb_inner_points), Baseline(new_baseline_start, new_baseline_end), None)) new_dataset.add_page(Page(page_id + ".jpg_rec", lines)) print() return new_dataset
def test_empty_raises(self): with self.assertRaises(ValueError): b = BoundingBox()
def test_starts_and_height_raises(self): with self.assertRaises(ValueError): b = BoundingBox(start_x=start.x, start_y=start.y, height=height)
def test_start_and_height_raises(self): with self.assertRaises(ValueError): b = BoundingBox(start=start, height=height)
def test_starts_and_width_raises(self): with self.assertRaises(ValueError): b = BoundingBox(start_x=start.x, start_y=start.y, width=width)
def test_start_and_width_raises(self): with self.assertRaises(ValueError): b = BoundingBox(start=start, width=width)
def test_starts_and_end_y_raises(self): with self.assertRaises(ValueError): b = BoundingBox(start_x=start.x, start_y=start.y, end_y=end.y)
def test_start_and_end_x_raises(self): with self.assertRaises(ValueError): b = BoundingBox(start=start, end_x=end.x)
def test_start_and_end(self): b = BoundingBox(start=start, end=end) self.compare_points(b.start, start) self.compare_points(b.end, end)
def test_ends_raises(self): with self.assertRaises(ValueError): b = BoundingBox(end_x=end.x, end_y=end.y)
def test_end_raises(self): with self.assertRaises(ValueError): b = BoundingBox(end=end)
def test_start_raises(self): with self.assertRaises(ValueError): b = BoundingBox(start=start)