Ejemplo n.º 1
0
    def create_line(self):
        if len(self.clicker.points) > 2:
            if self.lines:
                heights = np.median(np.array(
                    [[line.heights[0], line.heights[1]]
                     for line in self.lines]),
                                    axis=0)
            else:
                heights = [5, 5]

            new_line = layout.TextLine(id=None,
                                       baseline=np.array(
                                           self.clicker.points[:-1],
                                           dtype=np.float),
                                       heights=heights)
            new_line.polygon = linepp.baseline_to_textline(
                new_line.baseline, new_line.heights)
            dummy_region = layout.RegionLayout(id='dummy',
                                               polygon=[[-1.0, -1.0],
                                                        [-1.0, -1.0],
                                                        [-1.0, -1.0]])
            dummy_region.lines.append(new_line)
            self.page_layout.regions.append(dummy_region)
            self.lines.append(new_line)
            self.clear_interaction()
            self.render()
Ejemplo n.º 2
0
    def sync_all_heights(self):
        asc_heights = [line.heights[0] for line in self.lines]
        des_heights = [line.heights[1] for line in self.lines]
        synced_heights = [
            np.median(np.array(asc_heights)),
            np.median(np.array(des_heights))
        ]
        for line in self.lines:
            line.heights = synced_heights
            line.polygon = linepp.baseline_to_textline(line.baseline,
                                                       line.heights)

        self.render()
Ejemplo n.º 3
0
    def update_selected_lines(self, bs=0, asc=0, dsc=0, start=0, end=0):
        for l_num in self.selected_lines:
            self.lines[l_num].heights = [
                self.lines[l_num].heights[0] + float(asc),
                self.lines[l_num].heights[1] + float(dsc)
            ]
            self.lines[l_num].baseline[:, 1] += float(bs)
            self.lines[l_num].baseline[0, 0] += float(start)
            self.lines[l_num].baseline[-1, 0] += float(end)
            self.lines[l_num].polygon = linepp.baseline_to_textline(
                self.lines[l_num].baseline, self.lines[l_num].heights)

        self.render()
    def postprocess(self, region):
        if region.lines:
            if self.stretch_lines == 'max' or self.stretch_lines > 0:
                self.stretch_baselines(region)
            if self.resample_lines:
                self.resample_baselines(region)
            if self.heights_from_regions:
                self.get_heights_from_regions(region)

            for line in region.lines:
                line.polygon = helpers.baseline_to_textline(
                    line.baseline, line.heights)

        return region
Ejemplo n.º 5
0
 def sync_line_heights(self):
     asc_heights = [
         self.lines[l_num].heights[0] for l_num in self.selected_lines
     ]
     des_heights = [
         self.lines[l_num].heights[1] for l_num in self.selected_lines
     ]
     synced_heights = [
         np.median(np.array(asc_heights)),
         np.median(np.array(des_heights))
     ]
     for l_num in self.selected_lines:
         self.lines[l_num].heights = synced_heights
         self.lines[l_num].polygon = linepp.baseline_to_textline(
             self.lines[l_num].baseline, self.lines[l_num].heights)
     self.render()
Ejemplo n.º 6
0
    def process_page(self, img, page_layout: PageLayout):
        if self.detect_regions or self.detect_lines:
            if self.detect_regions:
                page_layout.regions = []
            if self.detect_lines:
                for region in page_layout.regions:
                    region.lines = []

            if self.multi_orientation:
                orientations = [0, 1, 3]
            else:
                orientations = [0]

            for rot in orientations:
                regions = []
                p_list, b_list, h_list, t_list = self.engine.detect(img,
                                                                    rot=rot)

                if self.detect_regions:
                    for id, polygon in enumerate(p_list):
                        if rot > 0:
                            id = 'r{:03d}_{}'.format(id, rot)
                        else:
                            id = 'r{:03d}'.format(id)
                        region = RegionLayout(id, polygon)
                        regions.append(region)

                if self.detect_lines:
                    if not self.detect_regions:
                        regions = page_layout.regions
                    # if len(regions) > 4:
                    #     regions = list(self.pool.map(partial(helpers.assign_lines_to_region, b_list, h_list, t_list),
                    #                      regions))
                    # else:
                    for region in regions:
                        region = helpers.assign_lines_to_region(
                            b_list, h_list, t_list, region)

                if self.detect_regions:
                    page_layout.regions += regions

        if self.merge_lines:
            for region in page_layout.regions:
                r_b_list, r_h_list = helpers.merge_lines(
                    [line.baseline for line in region.lines],
                    [line.heights for line in region.lines])
                r_t_list = [
                    helpers.baseline_to_textline(b, h)
                    for b, h in zip(r_b_list, r_h_list)
                ]
                region.lines = []
                region = helpers.assign_lines_to_region(
                    r_b_list, r_h_list, r_t_list, region)

        if self.adjust_lines:
            heights_map, ds = self.engine.get_maps(img)[:, :, :2]
            for line in page_layout.lines_iterator():
                sample_points = helpers.resample_baselines([line.baseline],
                                                           num_points=40)[0]
                line.heights = self.engine.get_heights(heights_map, ds,
                                                       sample_points)
                line.polygon = helpers.baseline_to_textline(
                    line.baseline, line.heights)

        return page_layout
Ejemplo n.º 7
0
    def parse(self, out_map, downsample):
        """Parse input baseline, height and region map into list of baselines
        coords, list of heights and region map
        :param out_map: array of baseline and endpoint probabilities with
        channels: ascender height, descender height, baselines, baseline
        endpoints, region boundaries
        :param downsample: downsample factor to apply to layout coords
        """
        b_list = []
        h_list = []
        structure = np.asarray([
            [1, 1, 1],
            [1, 1, 1],
            [1, 1, 1],
            [1, 1, 1],
            [1, 1, 1],
        ])

        out_map[:, :, 4][out_map[:, :, 4] < 0] = 0
        baselines_map = ndimage.convolve(out_map[:, :, 2], np.ones((3, 3)) / 9)
        baselines_map = nonmaxima_suppression(baselines_map,
                                              element_size=(7, 1))
        baselines_map = (baselines_map -
                         out_map[:, :, 3]) > self.line_detection_threshold
        heights_map = ndimage.morphology.grey_dilation(out_map[:, :, :2],
                                                       size=(7, 1, 1))

        baselines_map_dilated = ndimage.morphology.binary_dilation(
            baselines_map, structure=structure)
        baselines_img, num_detections = ndimage.measurements.label(
            baselines_map_dilated, structure=np.ones([3, 3]))
        baselines_img *= baselines_map
        inds = np.where(baselines_img > 0)
        labels = baselines_img[inds[0], inds[1]]

        for i in range(1, num_detections + 1):
            bl_inds, = np.where(labels == i)
            if len(bl_inds) > 5:
                # go from matrix indexing to image indexing
                pos_all = np.stack([inds[1][bl_inds], inds[0][bl_inds]],
                                   axis=1)

                _, indices = np.unique(pos_all[:, 0], return_index=True)
                pos = pos_all[indices]
                x_index = np.argsort(pos[:, 0])
                pos = pos[x_index]

                target_point_count = min(10, pos.shape[0] // 10)
                target_point_count = max(target_point_count, 2)
                selected_pos = np.linspace(0, (pos.shape[0]) - 1,
                                           target_point_count).astype(np.int32)

                pos = pos[selected_pos, :]
                pos[0, 0] -= 2  # compensate for endpoint detection overlaps
                pos[-1, 0] += 2

                heights_pred = heights_map[inds[0][bl_inds],
                                           inds[1][bl_inds], :]

                heights_pred = np.maximum(heights_pred, 0)
                heights_pred = np.asarray([
                    np.percentile(heights_pred[:, 0], 50),
                    np.percentile(heights_pred[:, 1], 50)
                ])

                b_list.append(downsample * pos.astype(np.float))
                h_list.append([
                    downsample * heights_pred[0], downsample * heights_pred[1]
                ])

        # sort lines from LEFT to RIGHT
        x_inds = [
            np.amin(baseline[:, 0]) + 0.0001 * np.random.rand()
            for baseline in b_list
        ]
        b_list = [b for _, b in sorted(zip(x_inds, b_list))]
        h_list = [h for _, h in sorted(zip(x_inds, h_list))]

        t_list = [
            helpers.baseline_to_textline(b, h) for b, h in zip(b_list, h_list)
        ]

        return b_list, h_list, t_list
Ejemplo n.º 8
0
    def process_page(self, img, page_layout: PageLayout):
        if self.detect_regions or self.detect_lines:
            if self.detect_regions:
                page_layout.regions = []
            if self.detect_lines:
                for region in page_layout.regions:
                    region.lines = []

            if self.multi_orientation:
                orientations = [0, 1, 3]
            else:
                orientations = [0]

            for rot in orientations:
                regions = []
                p_list, b_list, h_list, t_list = self.engine.detect(img,
                                                                    rot=rot)
                if self.detect_regions:
                    for id, polygon in enumerate(p_list):
                        if rot > 0:
                            id = 'r{:03d}_{}'.format(id, rot)
                        else:
                            id = 'r{:03d}'.format(id)
                        region = RegionLayout(id, polygon)
                        regions.append(region)
                if self.detect_lines:
                    if not self.detect_regions:
                        regions = page_layout.regions
                    regions = helpers.assign_lines_to_regions(
                        b_list, h_list, t_list, regions)
                if self.detect_regions:
                    page_layout.regions += regions

        if self.merge_lines:
            for region in page_layout.regions:
                while True:
                    original_line_count = len(region.lines)
                    r_b_list, r_h_list = helpers.merge_lines(
                        [line.baseline for line in region.lines],
                        [line.heights for line in region.lines])
                    r_t_list = [
                        helpers.baseline_to_textline(b, h)
                        for b, h in zip(r_b_list, r_h_list)
                    ]
                    region.lines = []
                    region = helpers.assign_lines_to_regions(
                        r_b_list, r_h_list, r_t_list, [region])[0]
                    if len(region.lines) == original_line_count:
                        break

        if self.detect_straight_lines_in_regions or self.adjust_heights or self.adjust_baselines:
            maps, ds = self.engine.parsenet.get_maps_with_optimal_resolution(
                img)

        if self.detect_straight_lines_in_regions:
            for region in page_layout.regions:
                pb_list, ph_list, pt_list = detect_lines_in_region(
                    region.polygon, maps, ds)
                region.lines = []
                region = helpers.assign_lines_to_regions(
                    pb_list, ph_list, pt_list, [region])[0]

        if self.adjust_heights:
            for line in page_layout.lines_iterator():
                sample_points = helpers.resample_baselines([line.baseline],
                                                           num_points=40)[0]
                line.heights = self.engine.get_heights(maps, ds, sample_points)
                line.polygon = helpers.baseline_to_textline(
                    line.baseline, line.heights)

        if self.adjust_baselines:
            crop_engine = cropper.EngineLineCropper(line_height=32,
                                                    poly=0,
                                                    scale=1)
            for line in page_layout.lines_iterator():
                line.baseline = refine_baseline(line.baseline, line.heights,
                                                maps, ds, crop_engine)
                line.polygon = helpers.baseline_to_textline(
                    line.baseline, line.heights)
        return page_layout
Ejemplo n.º 9
0
def detect_lines_in_region(region, detection_maps, downsample, line_detection_threshold=0.2):
    """
    Detects straight textlines inside a single region.

    :param region: numpy array of polygon points
    :param detection_maps: channel 0: ascender heights, channel 1: descender heights, channel 2: baseline detections,
    channel 3: baseline endpoints, channel 4: region detections
    :return: list of baselines, list of heights, list of textline polygons
    """

    region_polygon = np.stack([
        np.clip(region[:, 0] / downsample, 1, detection_maps.shape[1] - 2),
        np.clip(region[:, 1] / downsample, 1, detection_maps.shape[0] - 2)],
        axis=1
    )
    region_bb_lt = np.round(np.amin(region_polygon, axis=0) - 1).astype(np.int32)
    region_bb_rb = np.round(np.amax(region_polygon, axis=0) + 1).astype(np.int32)
    region_maps = detection_maps[region_bb_lt[1]:region_bb_rb[1], region_bb_lt[0]:region_bb_rb[0]]

    region_polygon -= region_bb_lt[np.newaxis]

    polygon_mask = np.zeros(region_maps.shape[0:2], dtype=np.float32)

    cv2.fillPoly(polygon_mask, [np.round(region_polygon).astype(np.int32)], 1.0)
    region_maps = region_maps * polygon_mask[:, :, np.newaxis]

    contours, hierarchy = cv2.findContours((region_maps[:, :, 2] > line_detection_threshold).astype(np.uint8),
                                          cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    cov_mat = np.zeros([2, 2])
    for contour in contours:
        contour = contour[:, 0]
        centralized = contour - contour.mean(axis=0)
        cov_mat += centralized.T.dot(centralized)
    eig_val, eig_vec = np.linalg.eig(cov_mat)
    direction = eig_vec[np.argmax(eig_val)]
    if direction[0] < 0:
        direction *= -1
    rad_angle = np.arctan2(direction[1], direction[0])

    T = cv2.getRotationMatrix2D(tuple(np.asarray(region_maps.shape[0:2]) * 0.5), -rad_angle / np.pi * 180, 1)
    T = np.concatenate((T, np.array([[0, 0, 1]])), axis=0)

    transformed_polygon = cv2.transform(region_polygon[np.newaxis], T[:2, :])
    transformed_polygon = transformed_polygon[0]

    polygon_lt = np.amin(transformed_polygon, axis=0)
    polygon_rb = np.amax(transformed_polygon, axis=0)

    M_trans = np.array([
        [1, 0, -polygon_lt[0]],
        [0, 1, -polygon_lt[1]],
        [0, 0, 1]
    ])
    T = np.dot(T, M_trans)
    output_size = tuple((polygon_rb - polygon_lt + 1).astype(int))

    region_map = cv2.warpAffine(region_maps[:, :, :3], T[:2, :], output_size)
    polygon_mask = cv2.warpAffine(polygon_mask, T[:2, :], output_size)

    region_map[:, :, 2][region_map[:, :, 2] < line_detection_threshold] = 0
    detection_projections = np.sum(region_map[:, :, 2], axis=1) / output_size[0]

    mean_height = np.average((region_map[:, :, 0] + region_map[:, :, 1])[polygon_mask > 0])
    baselines_y, baselines_y_float = find_peaks(detection_projections, min_distance=np.maximum(0.7*mean_height, 1))

    if baselines_y.shape[0] == 0:
        return [], [], []

    baselines_x0 = np.argmax(polygon_mask, axis=1)[baselines_y]  # first x of polygon mask
    baselines_x1 = (polygon_mask.shape[1] - np.argmax(polygon_mask[:, ::-1], axis=1))[baselines_y]  # last x of polygon mask

    baselines = np.stack((
        np.stack((baselines_x0, baselines_x1), axis=1),
        np.stack((baselines_y_float, baselines_y_float), axis=1)),
        axis=2
    )
    baselines = cv2.transform(baselines.astype(np.float32), np.linalg.inv(T)[:2, :])
    baselines = (baselines + region_bb_lt[np.newaxis] + 1) * downsample

    b_list = [b for b in baselines]

    h_list = []
    for by in baselines_y:
        asc_line = region_map[by, :, 0]
        asc = np.percentile(asc_line[region_map[by, :, 2] > line_detection_threshold], 70)
        des_line = region_map[by, :, 1]
        des = np.percentile(des_line[region_map[by, :, 2] > line_detection_threshold], 70)
        h_list.append([asc * downsample, des * downsample])

    t_list = [helpers.baseline_to_textline(b, h) for b, h in zip(b_list, h_list)]

    return b_list, h_list, t_list
Ejemplo n.º 10
0
    def detect_lines(self, img, region):
        """Performs simple line extraction in single text region using thresholding,
        correlation and connected component analysis.
        :param img: input image array
        :param region: target region polygon
        """

        baselines_list = []
        heights_list = []

        x1 = np.clip(np.amin(region[:, 0].astype(np.int32)), 0, img.shape[1])
        x2 = np.clip(np.amax(region[:, 0].astype(np.int32)), 0, img.shape[1])
        y1 = np.clip(np.amin(region[:, 1].astype(np.int32)), 0, img.shape[0])
        y2 = np.clip(np.amax(region[:, 1].astype(np.int32)), 0, img.shape[0])

        if x1 == x2 or y1 == y2:
            return [], [], []

        column_width = x2 - x1
        column_height = y2 - y1

        img_mask = polygon2mask(img.shape[0:2], np.flip(region, axis=1))
        img_mask = img_mask[y1:y2, x1:x2]
        img_mask = binary_erosion(img_mask,
                                  structure=np.ones(
                                      (1, 2 * self.ignored_border_pixels + 1)))

        img_crop = img[y1:y2, x1:x2, :]
        img_crop = img_crop.mean(axis=2).astype(np.uint8)
        img_crop = cv2.adaptiveThreshold(img_crop, 255,
                                         cv2.ADAPTIVE_THRESH_MEAN_C,
                                         cv2.THRESH_BINARY, self.block_size,
                                         self.adaptive_threshold) == 0

        img_crop = img_crop * img_mask

        img_crop_labeled, num_features = ndimage.measurements.label(img_crop)
        proj = np.sum(img_crop, axis=1)
        corr = np.correlate(proj, proj, mode='full')[proj.shape[0]:]
        corr_peaks = signal.find_peaks(corr, prominence=0, distance=1)[0]
        if len(corr_peaks) > 0:
            line_period = float(
                signal.find_peaks(corr, prominence=0, distance=1)[0][0])
        else:
            line_period = 1
        target_signal = -np.diff(proj)
        target_signal[target_signal < 0] = 0

        baseline_coords = signal.find_peaks(target_signal,
                                            distance=int(
                                                round(0.85 * line_period)))[0]
        region = shapely.geometry.polygon.Polygon(region)
        used_inds = []

        for baseline_coord in baseline_coords[::-1]:
            valid_baseline = True
            matching_objects = np.unique(img_crop_labeled[baseline_coord -
                                                          10, :])[1:]
            if len(matching_objects) > 0:
                for ind in matching_objects:
                    if ind in used_inds:
                        valid_baseline = False
                    used_inds.append(ind)

                for yb1 in range(baseline_coord, 0, -3):
                    line_inds_to_check = img_crop_labeled[yb1, :]
                    if not np.any(
                            np.intersect1d(matching_objects,
                                           line_inds_to_check)):
                        break

                for yb2 in range(baseline_coord, column_height, 3):
                    line_inds_to_check = img_crop_labeled[yb2, :]
                    if not np.any(
                            np.intersect1d(matching_objects,
                                           line_inds_to_check)):
                        break

                xb1, xb2 = 0, column_width

                if xb2 - xb1 < self.minimum_length:
                    valid_baseline = False

                line = shapely.geometry.LineString(
                    [[x1 + xb1, y1 + baseline_coord],
                     [x1 + xb2, y1 + baseline_coord]])
                intersection = region.intersection(line)
                if intersection.geom_type == 'LineString':
                    if valid_baseline:
                        baselines_list.append(
                            np.round(
                                np.asarray(
                                    list(region.intersection(
                                        line).coords[:]))).astype(np.int16))
                        heights_list.append(
                            [baseline_coord - yb1, yb2 - baseline_coord])

        textlines_list = [
            helpers.baseline_to_textline(baseline, heights)
            for baseline, heights in zip(baselines_list, heights_list)
        ]

        return baselines_list, heights_list, textlines_list
Ejemplo n.º 11
0
    def detect(self, image, rot=0):
        """Uses parsenet to find lines and region separators, clusters vertically
        close lines by computing penalties and postprocesses the resulting
        regions.
        :param rot: number of counter-clockwise 90degree rotations (0 <= n <= 3)
        """
        if rot > 0:
            image = np.rot90(image, k=rot)

        maps, ds = self.get_maps(
            image, update_downsample=(rot == 0)
        )  # update downsample factor if rot is 0, else assume that the same page was already parsed once to save time during downsample estimation
        b_list, h_list, layout_separator_map = self.parse(maps, ds)
        if not b_list:
            return [], [], [], []
        t_list = [
            helpers.baseline_to_textline(b, h) for b, h in zip(b_list, h_list)
        ]

        # cluster the lines into regions
        clusters_array = self.cluster_lines(t_list, layout_separator_map, ds)
        regions_textlines_tmp = []
        polygons_tmp = []
        for i in range(np.amax(clusters_array) + 1):
            region_baselines = []
            region_heights = []
            region_textlines = []
            for baseline, heights, textline, cluster in zip(
                    b_list, h_list, t_list, clusters_array):
                if cluster == i:
                    region_baselines.append(baseline)
                    region_heights.append(heights)
                    region_textlines.append(textline)

            region_poly = helpers.region_from_textlines(region_textlines)
            regions_textlines_tmp.append(region_textlines)
            polygons_tmp.append(region_poly)

        # remove overlaps while minimizing textline modifications
        polygons_tmp = self.filter_polygons(polygons_tmp,
                                            regions_textlines_tmp)

        # up to this point, polygons can be any geometry that comes from alpha_shape
        p_list = []
        for region_poly in polygons_tmp:
            if region_poly.geom_type == 'MultiPolygon':
                for poly in region_poly:
                    p_list.append(poly.simplify(5))
            if region_poly.geom_type == 'Polygon':
                p_list.append(region_poly.simplify(5))

        b_list, h_list, t_list = helpers.order_lines_vertical(
            b_list, h_list, t_list)
        p_list = [np.array(poly.exterior.coords) for poly in p_list]

        if rot == 1:
            b_list = [np.flip(b, axis=1) for b in b_list]
            t_list = [np.flip(t, axis=1) for t in t_list]
            p_list = [np.flip(p, axis=1) for p in p_list]
            for b in b_list:
                b[:, 0] = image.shape[0] - b[:, 0]
            for t in t_list:
                t[:, 0] = image.shape[0] - t[:, 0]
            for p in p_list:
                p[:, 0] = image.shape[0] - p[:, 0]
        elif rot == 2:
            shape_array = np.asarray(image.shape[:2][::-1])
            b_list = [shape_array - b for b in b_list]
            t_list = [shape_array - t for t in t_list]
            p_list = [shape_array - p for p in p_list]
        elif rot == 3:
            b_list = [np.flip(b, axis=1) for b in b_list]
            t_list = [np.flip(t, axis=1) for t in t_list]
            p_list = [np.flip(p, axis=1) for p in p_list]
            for b in b_list:
                b[:, 1] = image.shape[1] - b[:, 1]
            for t in t_list:
                t[:, 1] = image.shape[1] - t[:, 1]
            for p in p_list:
                p[:, 1] = image.shape[1] - p[:, 1]

        return p_list, b_list, h_list, t_list