Example #1
0
def _add_annotation(annotations,
                    segment,
                    imgid,
                    catid,
                    coords=None,
                    mask=None):
    LOG = getLogger('processor.EvaluateSegmentation')
    score = segment.get_Coords().get_conf() or 1.0
    polygon = polygon_from_points(segment.get_Coords().points)
    if len(polygon) < 3:
        LOG.warning('ignoring segment "%s" with only %d points', segment.id,
                    len(polygon))
        return
    xywh = xywh_from_polygon(polygon)
    if mask is None:
        segmentation = np.array(polygon).reshape(1, -1).tolist()
    else:
        polygon = coordinates_of_segment(segment, None, coords)
        py, px = draw.polygon(polygon[:, 1], polygon[:, 0], mask.shape)
        masked = np.zeros(
            mask.shape, dtype=np.uint8,
            order='F')  # pycocotools.mask wants Fortran-contiguous arrays
        masked[py, px] = 1 * mask[py, px]
        segmentation = encodeMask(masked)
    annotations.append({
        'segment_id':
        segment.id,  # non-standard string-valued in addition to 'id'
        'image_id': imgid,
        'category_id': catid,
        'segmentation': segmentation,
        'area': Polygon(polygon).area,
        'bbox': [xywh['x'], xywh['y'], xywh['w'], xywh['h']],
        'score': score,
        'iscrowd': 0
    })
Example #2
0
 def sanitize_page(self, page, page_id):
     regions = page.get_TextRegion()
     page_image, page_coords, _ = self.workspace.image_from_page(
         page, page_id)
     for region in regions:
         LOG.info('Sanitizing region "%s"', region.id)
         lines = region.get_TextLine()
         heights = []
         # get labels:
         region_mask = np.zeros((page_image.height, page_image.width), dtype=np.uint8)
         for line in lines:
             line_polygon = coordinates_of_segment(line, page_image, page_coords)
             heights.append(xywh_from_polygon(line_polygon)['h'])
             region_mask[draw.polygon(line_polygon[:, 1],
                                      line_polygon[:, 0],
                                      region_mask.shape)] = 1
             region_mask[draw.polygon_perimeter(line_polygon[:, 1],
                                                line_polygon[:, 0],
                                                region_mask.shape)] = 1
         # estimate scale:
         scale = int(np.median(np.array(heights)))
         # close labels:
         region_mask = np.pad(region_mask, scale) # protect edges
         region_mask = filters.maximum_filter(region_mask, (scale, 1), origin=0)
         region_mask = filters.minimum_filter(region_mask, (scale, 1), origin=0)
         region_mask = region_mask[scale:-scale, scale:-scale] # unprotect
         # find outer contour (parts):
         contours, _ = cv2.findContours(region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         # determine areas of parts:
         areas = [cv2.contourArea(contour) for contour in contours]
         total_area = sum(areas)
         if not total_area:
             # ignore if too small
             LOG.warning('Zero contour area in region "%s"', region.id)
             continue
         # pick contour and convert to absolute:
         region_polygon = None
         for i, contour in enumerate(contours):
             area = areas[i]
             if area / total_area < 0.1:
                 LOG.warning('Ignoring contour %d too small (%d/%d) in region "%s"',
                             i, area, total_area, region.id)
                 continue
             # simplify shape:
             polygon = cv2.approxPolyDP(contour, 2, False)[:, 0, ::] # already ordered x,y
             if len(polygon) < 4:
                 LOG.warning('Ignoring contour %d less than 4 points in region "%s"',
                             i, region.id)
                 continue
             if region_polygon is not None:
                 LOG.error('Skipping region "%s" due to non-contiguous contours',
                           region.id)
                 region_polygon = None
                 break
             region_polygon = coordinates_for_segment(polygon, page_image, page_coords)
         if region_polygon is not None:
             LOG.info('Using new coordinates for region "%s"', region.id)
             region.get_Coords().points = points_from_polygon(region_polygon)
Example #3
0
 def test_xywh_from_polygon(self):
     self.assertEqual(
         xywh_from_polygon([[100, 100], [200, 100], [200, 200],
                            [100, 200]]), {
                                'x': 100,
                                'y': 100,
                                'w': 100,
                                'h': 100
                            })
Example #4
0
    def process(self):
        """Extract page images and region descriptions (type and coordinates) from the workspace.
        
        Open and deserialize PAGE input files and their respective images,
        then iterate over the element hierarchy down to the region level.
        
        Get all regions with their types (region element class), sub-types (@type)
        and coordinates relative to the page (which depending on the workflow could
        already be cropped, deskewed, dewarped, binarized etc). Extract the image of
        the (cropped, deskewed, dewarped) page, both in binarized form (if available)
        and non-binarized form. In addition, create a new image with masks for all
        regions, color-coded by type. Create two JSON files with region types and
        coordinates: one (page-wise) in our custom format and one (global) in MS-COCO.
        
        The output file group may be given as a comma-separated list to separate
        these 3 page-level images. Write files as follows:
        * in the first (or only) output file group (directory):
          - ID + '.png': raw image of the (preprocessed) page
          - ID + '.json': region coordinates/classes (custom format)
        * in the second (or first) output file group (directory):
          - ID + '.bin.png': binarized image of the (preprocessed) page, if available
        * in the third (or first) output file group (directory):
          - ID + '.dbg.png': debug image
        
        In addition, write a file for all pages at once:
        * in the third (or first) output file group (directory):
          - output_file_grp + '.coco.json': region coordinates/classes (MS-COCO format)
          - output_file_grp + '.colordict.json': color definitions (as in PAGE viewer)
        
        (This is intended for training and evaluation of region segmentation models.)
        """
        file_groups = self.output_file_grp.split(',')
        if len(file_groups) > 3:
            raise Exception(
                "at most 3 output file grps allowed (raw, [binarized, [mask]] image)"
            )
        if len(file_groups) > 2:
            dbg_image_grp = file_groups[2]
        else:
            dbg_image_grp = file_groups[0]
            LOG.info(
                "No output file group for debug images specified, falling back to output filegrp '%s'",
                dbg_image_grp)
        if len(file_groups) > 1:
            bin_image_grp = file_groups[1]
        else:
            bin_image_grp = file_groups[0]
            LOG.info(
                "No output file group for binarized images specified, falling back to output filegrp '%s'",
                bin_image_grp)
        self.output_file_grp = file_groups[0]

        # COCO: init data structures
        images = list()
        annotations = list()
        categories = list()
        i = 0
        for cat, color in CLASSES.items():
            # COCO format does not allow alpha channel
            color = (int(color[0:2], 16), int(color[2:4],
                                              16), int(color[4:6], 16))
            try:
                supercat, name = cat.split(':')
            except ValueError:
                name = cat
                supercat = ''
            categories.append({
                'id': i,
                'name': name,
                'supercategory': supercat,
                'source': 'PAGE',
                'color': color
            })
            i += 1

        i = 0
        # pylint: disable=attribute-defined-outside-init
        for n, input_file in enumerate(self.input_files):
            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.output_file_grp)
            page_id = input_file.pageId or input_file.ID
            num_page_id = int(page_id.strip(page_id.strip("0123456789")))
            LOG.info("INPUT FILE %i / %s", n, page_id)
            pcgts = page_from_file(self.workspace.download_file(input_file))
            page = pcgts.get_Page()
            ptype = page.get_type()
            metadata = pcgts.get_Metadata()  # ensured by from_file()
            metadata.add_MetadataItem(
                MetadataItemType(
                    type_="processingStep",
                    name=self.ocrd_tool['steps'][0],
                    value=TOOL,
                    Labels=[
                        LabelsType(externalModel="ocrd-tool",
                                   externalId="parameters",
                                   Label=[
                                       LabelType(type_=name,
                                                 value=self.parameter[name])
                                       for name in self.parameter
                                   ])
                    ]))
            page_image, page_coords, page_image_info = self.workspace.image_from_page(
                page,
                page_id,
                feature_filter='binarized',
                transparency=self.parameter['transparency'])
            if page_image_info.resolution != 1:
                dpi = page_image_info.resolution
                if page_image_info.resolutionUnit == 'cm':
                    dpi = round(dpi * 2.54)
            else:
                dpi = None
            file_path = self.workspace.save_image_file(
                page_image,
                file_id,
                self.output_file_grp,
                page_id=page_id,
                mimetype=self.parameter['mimetype'])
            try:
                page_image_bin, _, _ = self.workspace.image_from_page(
                    page,
                    page_id,
                    feature_selector='binarized',
                    transparency=self.parameter['transparency'])
                self.workspace.save_image_file(page_image_bin,
                                               file_id + '.bin',
                                               bin_image_grp,
                                               page_id=page_id)
            except Exception as err:
                if err.args[0].startswith('Found no AlternativeImage'):
                    LOG.warning(
                        'Page "%s" has no binarized images, skipping .bin',
                        page_id)
                else:
                    raise
            page_image_dbg = Image.new(mode='RGBA',
                                       size=page_image.size,
                                       color='#' + CLASSES[''])
            if page.get_Border():
                polygon = coordinates_of_segment(page.get_Border(), page_image,
                                                 page_coords).tolist()
                ImageDraw.Draw(page_image_dbg).polygon(
                    list(map(tuple, polygon)), fill='#' + CLASSES['Border'])
            else:
                page_image_dbg.paste(
                    '#' + CLASSES['Border'],
                    (0, 0, page_image.width, page_image.height))
            regions = dict()
            for name in CLASSES.keys():
                if not name or name == 'Border' or ':' in name:
                    # no subtypes here
                    continue
                regions[name] = getattr(page, 'get_' + name)()
            description = {'angle': page.get_orientation()}
            Neighbor = namedtuple('Neighbor', ['id', 'poly', 'type'])
            neighbors = []
            for rtype, rlist in regions.items():
                for region in rlist:
                    if rtype in ['TextRegion', 'ChartRegion', 'GraphicRegion']:
                        subrtype = region.get_type()
                    else:
                        subrtype = None
                    polygon = coordinates_of_segment(region, page_image,
                                                     page_coords)
                    polygon2 = polygon.reshape(1, -1).tolist()
                    polygon = polygon.tolist()
                    xywh = xywh_from_polygon(polygon)
                    # validate coordinates and check intersection with neighbours
                    # (which would melt into another in the mask image):
                    try:
                        poly = Polygon(polygon)
                        reason = ''
                    except ValueError as err:
                        reason = err
                    if not poly.is_valid:
                        reason = explain_validity(poly)
                    elif poly.is_empty:
                        reason = 'is empty'
                    elif poly.bounds[0] < 0 or poly.bounds[1] < 0:
                        reason = 'is negative'
                    elif poly.length < 4:
                        reason = 'has too few points'
                    if reason:
                        LOG.error('Page "%s" region "%s" %s', page_id,
                                  region.id, reason)
                        continue
                    poly_prep = prep(poly)
                    for neighbor in neighbors:
                        if (rtype == neighbor.type
                                and poly_prep.intersects(neighbor.poly)
                                and poly.intersection(neighbor.poly).area > 0):
                            LOG.warning('Page "%s" region "%s" intersects neighbour "%s" (IoU: %.3f)',
                                        page_id, region.id, neighbor.id,
                                        poly.intersection(neighbor.poly).area / \
                                        poly.union(neighbor.poly).area)
                        elif (rtype != neighbor.type
                              and poly_prep.within(neighbor.poly)):
                            LOG.warning(
                                'Page "%s" region "%s" within neighbour "%s" (IoU: %.3f)',
                                page_id, region.id, neighbor.id,
                                poly.area / neighbor.poly.area)
                    neighbors.append(Neighbor(region.id, poly, rtype))
                    area = poly.area
                    description.setdefault('regions', []).append({
                        'type':
                        rtype,
                        'subtype':
                        subrtype,
                        'coords':
                        polygon,
                        'area':
                        area,
                        'features':
                        page_coords['features'],
                        'DPI':
                        dpi,
                        'region.ID':
                        region.id,
                        'page.ID':
                        page_id,
                        'page.type':
                        ptype,
                        'file_grp':
                        self.input_file_grp,
                        'METS.UID':
                        self.workspace.mets.unique_identifier
                    })
                    # draw region:
                    ImageDraw.Draw(page_image_dbg).polygon(
                        list(map(tuple, polygon)),
                        fill='#' + CLASSES[(rtype + ':' +
                                            subrtype) if subrtype else rtype])
                    # COCO: add annotations
                    i += 1
                    annotations.append({
                        'id':
                        i,
                        'image_id':
                        num_page_id,
                        'category_id':
                        next(
                            (cat['id']
                             for cat in categories if cat['name'] == subrtype),
                            next((cat['id'] for cat in categories
                                  if cat['name'] == rtype))),
                        'segmentation':
                        polygon2,
                        'area':
                        area,
                        'bbox': [xywh['x'], xywh['y'], xywh['w'], xywh['h']],
                        'iscrowd':
                        0
                    })

            self.workspace.save_image_file(page_image_dbg,
                                           file_id + '.dbg',
                                           dbg_image_grp,
                                           page_id=page_id,
                                           mimetype=self.parameter['mimetype'])
            self.workspace.add_file(
                ID=file_id + '.json',
                file_grp=dbg_image_grp,
                pageId=page_id,
                local_filename=file_path.replace(
                    MIME_TO_EXT[self.parameter['mimetype']], '.json'),
                mimetype='application/json',
                content=json.dumps(description))

            # COCO: add image
            images.append({
                # COCO does not allow string identifiers:
                # -> use numerical part of page_id
                'id': num_page_id,
                # all exported coordinates are relative to the cropped page:
                # -> use that for reference (instead of original page.imageFilename)
                'file_name': file_path,
                # -> use its size (instead of original page.imageWidth/page.imageHeight)
                'width': page_image.width,
                'height': page_image.height
            })

        # COCO: write result
        file_id = dbg_image_grp + '.coco.json'
        LOG.info('Writing COCO result file "%s" in "%s"', file_id,
                 dbg_image_grp)
        self.workspace.add_file(ID=file_id,
                                file_grp=dbg_image_grp,
                                local_filename=os.path.join(
                                    dbg_image_grp, file_id),
                                mimetype='application/json',
                                content=json.dumps({
                                    'categories': categories,
                                    'images': images,
                                    'annotations': annotations
                                }))

        # write inverse colordict (for ocrd-segment-from-masks)
        file_id = dbg_image_grp + '.colordict.json'
        LOG.info('Writing colordict file "%s" in .', file_id)
        with open(file_id, 'w') as out:
            json.dump(
                dict(('#' + col, name) for name, col in CLASSES.items()
                     if name), out)
Example #5
0
    def process(self):
        """Extract page images and region descriptions (type and coordinates) from the workspace.
        
        Open and deserialize PAGE input files and their respective images,
        then iterate over the element hierarchy down to the region level.
        
        Get all regions with their types (region element class), sub-types (@type)
        and coordinates relative to the page (which depending on the workflow could
        already be cropped, deskewed, dewarped, binarized etc). Extract the image of
        the (cropped, deskewed, dewarped) page, both in binarized form (if available)
        and raw form. For the latter, apply ``feature_filter`` (a comma-separated list
        of image features, cf. :py:func:`ocrd.workspace.Workspace.image_from_page`)
        to skip specific features when retrieving derived images. If ``transparency``
        is true, then also add an alpha channel which is fully transparent outside of
        the mask.
        
        In addition, create a new (third) image with masks for each segment type in
        ``plot_segmasks``, color-coded by class according to ``colordict``.
        
        Create two JSON files with region types and coordinates: one (page-wise) in
        our custom format and one (global) in MS-COCO.
        
        The output file group may be given as a comma-separated list to separate
        these 3 kinds of images. If fewer than 3 fileGrps are specified, they will
        share the same fileGrp (and directory). In particular, write files as follows:
        * in the first (or only) output file group (directory):
          - ID + '.png': raw image of the page (preprocessed, but with ``feature_filter``)
          - ID + '.json': region coordinates/classes (custom format)
        * in the second (or only) output file group (directory):
          - ID + '.bin.png': binarized image of the (preprocessed) page, if available
        * in the third (or second or only) output file group (directory):
          - ID + '.pseg.png': mask image of page; contents depend on ``plot_segmasks``:
            1. if it contains `page`, fill page frame,
            2. if it contains `region`, fill region segmentation/classification,
            3. if it contains `line`, fill text line segmentation,
            4. if it contains `word`, fill word segmentation,
            5. if it contains `glyph`, fill glyph segmentation,
            where each follow-up layer and segment draws over the previous state, starting
            with a blank (white) image - unless ``plot_overlay`` is true, in which case
            each layer and segment is superimposed (alpha blended) onto the previous one,
            starting with the above raw image.
        
        In addition, write a file for all pages at once:
        * in the third (or first) output file group (directory):
          - output_file_grp + '.coco.json': region coordinates/classes (MS-COCO format)
          - output_file_grp + '.colordict.json': the used ``colordict``
        
        (This is intended for training and evaluation of region segmentation models.)
        """
        LOG = getLogger('processor.ExtractPages')
        assert_file_grp_cardinality(self.input_file_grp, 1)
        file_groups = self.output_file_grp.split(',')
        if len(file_groups) > 3:
            raise Exception(
                "at most 3 output file grps allowed (raw, [binarized, [mask]] image)"
            )
        if len(file_groups) > 2:
            mask_image_grp = file_groups[2]
        else:
            mask_image_grp = file_groups[0]
            LOG.info(
                "No output file group for mask images specified, falling back to output filegrp '%s'",
                mask_image_grp)
        if len(file_groups) > 1:
            bin_image_grp = file_groups[1]
        else:
            bin_image_grp = file_groups[0]
            LOG.info(
                "No output file group for binarized images specified, falling back to output filegrp '%s'",
                bin_image_grp)
        self.output_file_grp = file_groups[0]
        classes = self.parameter['colordict']

        # COCO: init data structures
        images = list()
        annotations = list()
        categories = list()
        i = 0
        for cat, color in classes.items():
            # COCO format does not allow alpha channel
            color = (int(color[0:2], 16), int(color[2:4],
                                              16), int(color[4:6], 16))
            try:
                supercat, name = cat.split(':')
            except ValueError:
                name = cat
                supercat = ''
            categories.append({
                'id': i,
                'name': name,
                'supercategory': supercat,
                'source': 'PAGE',
                'color': color
            })
            i += 1

        i = 0
        # pylint: disable=attribute-defined-outside-init
        for n, input_file in enumerate(self.input_files):
            page_id = input_file.pageId or input_file.ID
            try:
                # separate non-numeric part of page ID to retain the numeric part
                num_page_id = int(page_id.strip(page_id.strip("0123456789")))
            except Exception:
                num_page_id = n
            LOG.info("INPUT FILE %i / %s", n, page_id)
            pcgts = page_from_file(self.workspace.download_file(input_file))
            self.add_metadata(pcgts)
            page = pcgts.get_Page()
            ptype = page.get_type()
            page_image, page_coords, page_image_info = self.workspace.image_from_page(
                page,
                page_id,
                feature_filter=self.parameter['feature_filter'],
                transparency=self.parameter['transparency'])
            if page_image_info.resolution != 1:
                dpi = page_image_info.resolution
                if page_image_info.resolutionUnit == 'cm':
                    dpi = round(dpi * 2.54)
            else:
                dpi = None
            file_id = make_file_id(input_file, self.output_file_grp)
            file_path = self.workspace.save_image_file(
                page_image,
                file_id,
                self.output_file_grp,
                page_id=page_id,
                mimetype=self.parameter['mimetype'])
            try:
                page_image_bin, _, _ = self.workspace.image_from_page(
                    page,
                    page_id,
                    feature_selector='binarized',
                    transparency=self.parameter['transparency'])
                self.workspace.save_image_file(page_image_bin,
                                               file_id + '.bin',
                                               bin_image_grp,
                                               page_id=page_id)
            except Exception as err:
                if err.args[0].startswith('Found no AlternativeImage'):
                    LOG.warning(
                        'Page "%s" has no binarized images, skipping .bin',
                        page_id)
                else:
                    raise
            # init multi-level mask output
            if self.parameter['plot_overlay']:
                page_image_segmask = page_image.convert('RGBA')
            else:
                page_image_segmask = Image.new(mode='RGBA',
                                               size=page_image.size,
                                               color='#FFFFFF00')
            neighbors = dict()
            for level in ['page', 'region', 'line', 'word', 'glyph']:
                neighbors[level] = list()
            # produce border mask plot, if necessary
            if page.get_Border():
                poly = segment_poly(page_id, page.get_Border(), page_coords)
            else:
                poly = Polygon(
                    polygon_from_bbox(0, 0, page_image.width,
                                      page_image.height))
            if 'page' in self.parameter['plot_segmasks']:
                plot_segment(page_id, page.get_Border(), poly, 'Border',
                             classes, page_image_segmask, [],
                             self.parameter['plot_overlay'])
            # get regions and aggregate masks on all hierarchy levels
            description = {'angle': page.get_orientation()}
            regions = dict()
            for name in classes.keys():
                if not name or not name.endswith('Region'):
                    # no region subtypes or non-region types here
                    continue
                #regions[name] = getattr(page, 'get_' + name)()
                regions[name] = page.get_AllRegions(classes=name[:-6],
                                                    order='reading-order')
            for rtype, rlist in regions.items():
                for region in rlist:
                    if rtype in ['TextRegion', 'ChartRegion', 'GraphicRegion']:
                        subrtype = region.get_type()
                    else:
                        subrtype = None
                    if subrtype:
                        rtype0 = rtype + ':' + subrtype
                    else:
                        rtype0 = rtype
                    poly = segment_poly(page_id, region, page_coords)
                    # produce region mask plot, if necessary
                    if 'region' in self.parameter['plot_segmasks']:
                        plot_segment(page_id, region, poly, rtype0, classes,
                                     page_image_segmask, neighbors['region'],
                                     self.parameter['plot_overlay'])
                    if rtype == 'TextRegion':
                        lines = region.get_TextLine()
                        for line in lines:
                            # produce line mask plot, if necessary
                            poly = segment_poly(page_id, line, page_coords)
                            if 'line' in self.parameter['plot_segmasks']:
                                plot_segment(page_id, line, poly, 'TextLine',
                                             classes, page_image_segmask,
                                             neighbors['line'],
                                             self.parameter['plot_overlay'])
                            words = line.get_Word()
                            for word in words:
                                # produce line mask plot, if necessary
                                poly = segment_poly(page_id, word, page_coords)
                                if 'word' in self.parameter['plot_segmasks']:
                                    plot_segment(
                                        page_id, word, poly, 'Word', classes,
                                        page_image_segmask, neighbors['word'],
                                        self.parameter['plot_overlay'])
                                glyphs = word.get_Glyph()
                                for glyph in glyphs:
                                    # produce line mask plot, if necessary
                                    poly = segment_poly(
                                        page_id, glyph, page_coords)
                                    if 'glyph' in self.parameter[
                                            'plot_segmasks']:
                                        plot_segment(
                                            page_id, glyph, poly, 'Glyph',
                                            classes, page_image_segmask,
                                            neighbors['glyph'],
                                            self.parameter['plot_overlay'])
                    if not poly:
                        continue
                    polygon = np.array(poly.exterior, np.int)[:-1].tolist()
                    xywh = xywh_from_polygon(polygon)
                    area = poly.area
                    description.setdefault('regions', []).append({
                        'type':
                        rtype,
                        'subtype':
                        subrtype,
                        'coords':
                        polygon,
                        'area':
                        area,
                        'features':
                        page_coords['features'],
                        'DPI':
                        dpi,
                        'region.ID':
                        region.id,
                        'page.ID':
                        page_id,
                        'page.type':
                        ptype,
                        'file_grp':
                        self.input_file_grp,
                        'METS.UID':
                        self.workspace.mets.unique_identifier
                    })
                    # COCO: add annotations
                    i += 1
                    annotations.append({
                        'id':
                        i,
                        'image_id':
                        num_page_id,
                        'category_id':
                        next(
                            (cat['id']
                             for cat in categories if cat['name'] == subrtype),
                            next((cat['id'] for cat in categories
                                  if cat['name'] == rtype))),
                        'segmentation':
                        np.array(poly.exterior,
                                 np.int)[:-1].reshape(1, -1).tolist(),
                        'area':
                        area,
                        'bbox': [xywh['x'], xywh['y'], xywh['w'], xywh['h']],
                        'iscrowd':
                        0
                    })

            if 'order' in self.parameter['plot_segmasks']:
                plot_order(page.get_ReadingOrder(), classes,
                           page_image_segmask, neighbors['region'],
                           self.parameter['plot_overlay'])
            if self.parameter['plot_segmasks']:
                self.workspace.save_image_file(
                    page_image_segmask,
                    file_id + '.pseg',
                    mask_image_grp,
                    page_id=page_id,
                    mimetype=self.parameter['mimetype'])
            self.workspace.add_file(
                ID=file_id + '.json',
                file_grp=mask_image_grp,
                pageId=input_file.pageId,
                local_filename=file_path.replace(
                    MIME_TO_EXT[self.parameter['mimetype']], '.json'),
                mimetype='application/json',
                content=json.dumps(description))

            # COCO: add image
            images.append({
                # COCO does not allow string identifiers:
                # -> use numerical part of page_id
                'id': num_page_id,
                # all exported coordinates are relative to the cropped page:
                # -> use that for reference (instead of original page.imageFilename)
                'file_name': file_path,
                # -> use its size (instead of original page.imageWidth/page.imageHeight)
                'width': page_image.width,
                'height': page_image.height
            })

        # COCO: write result
        file_id = mask_image_grp + '.coco.json'
        LOG.info('Writing COCO result file "%s" in "%s"', file_id,
                 mask_image_grp)
        self.workspace.add_file(ID=file_id,
                                file_grp=mask_image_grp,
                                local_filename=os.path.join(
                                    mask_image_grp, file_id),
                                mimetype='application/json',
                                pageId=None,
                                content=json.dumps({
                                    'categories': categories,
                                    'images': images,
                                    'annotations': annotations
                                }))

        # write inverse colordict (for ocrd-segment-from-masks)
        file_id = mask_image_grp + '.colordict.json'
        LOG.info('Writing colordict file "%s" in .', file_id)
        with open(file_id, 'w') as out:
            json.dump(
                dict((col, name) for name, col in classes.items() if name),
                out)
Example #6
0
def test_deskewing(plain_workspace):
    #from ocrd_utils import initLogging, setOverrideLogLevel
    #setOverrideLogLevel('DEBUG')
    size = (3000, 4000)
    poly = [[1403, 2573], [1560, 2573], [1560, 2598], [2311,
                                                       2598], [2311, 2757],
            [2220, 2757], [2220, 2798], [2311, 2798], [2311, 2908],
            [1403, 2908]]
    xywh = xywh_from_polygon(poly)
    bbox = bbox_from_polygon(poly)
    skew = 4.625
    image = Image.new('L', size)
    image = polygon_mask(image, poly)
    #image.show(title='image')
    pixels = np.count_nonzero(np.array(image) > 0)
    name = 'foo0'
    assert plain_workspace.save_image_file(image, name, 'IMG')
    pcgts = page_from_file(next(plain_workspace.mets.find_files(ID=name)))
    page = pcgts.get_Page()
    region = TextRegionType(
        id='nonrect',
        Coords=CoordsType(points=points_from_polygon(poly)),
        orientation=-skew)
    page.add_TextRegion(region)
    page_image, page_coords, _ = plain_workspace.image_from_page(page, '')
    #page_image.show(title='page_image')
    assert list(image.getdata()) == list(page_image.getdata())
    assert np.all(page_coords['transform'] == np.eye(3))
    reg_image, reg_coords = plain_workspace.image_from_segment(
        region, page_image, page_coords, feature_filter='deskewed', fill=0)
    assert list(image.crop(bbox).getdata()) == list(reg_image.getdata())
    assert reg_image.width == xywh['w'] == 908
    assert reg_image.height == xywh['h'] == 335
    assert reg_coords['transform'][0, 2] == -xywh['x']
    assert reg_coords['transform'][1, 2] == -xywh['y']
    # same fg after cropping to minimal bbox
    reg_pixels = np.count_nonzero(np.array(reg_image) > 0)
    assert pixels == reg_pixels
    # now with deskewing (test for size after recropping)
    reg_image, reg_coords = plain_workspace.image_from_segment(region,
                                                               page_image,
                                                               page_coords,
                                                               fill=0)
    #reg_image.show(title='reg_image')
    assert reg_image.width == 932 > xywh['w']
    assert reg_image.height == 382 > xywh['h']
    assert reg_coords['transform'][0, 1] != 0
    assert reg_coords['transform'][1, 0] != 0
    assert 'deskewed' in reg_coords['features']
    # same fg after cropping to minimal bbox (roughly - due to aliasing)
    reg_pixels = np.count_nonzero(np.array(reg_image) > 0)
    assert np.abs(pixels - reg_pixels) / pixels < 0.005
    reg_array = np.array(reg_image) > 0
    # now via AlternativeImage
    path = plain_workspace.save_image_file(reg_image, region.id + '_img',
                                           'IMG')
    region.add_AlternativeImage(
        AlternativeImageType(filename=path, comments=reg_coords['features']))
    logger_capture = FIFOIO(256)
    logger_handler = logging.StreamHandler(logger_capture)
    #logger_handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT, datefmt=LOG_TIMEFMT))
    logger = logging.getLogger('ocrd_utils.crop_image')
    logger.addHandler(logger_handler)
    reg_image2, reg_coords2 = plain_workspace.image_from_segment(region,
                                                                 page_image,
                                                                 page_coords,
                                                                 fill=0)
    #reg_image2.show(title='reg_image2')
    logger_output = logger_capture.getvalue()
    logger_capture.close()
    assert logger_output == ''
    assert reg_image2.width == reg_image.width
    assert reg_image2.height == reg_image.height
    assert np.allclose(reg_coords2['transform'], reg_coords['transform'])
    assert reg_coords2['features'] == reg_coords['features']
    # same fg after cropping to minimal bbox (roughly - due to aliasing)
    reg_pixels2 = np.count_nonzero(np.array(reg_image) > 0)
    assert reg_pixels2 == reg_pixels
    reg_array2 = np.array(reg_image2) > 0
    assert 0.98 < np.sum(reg_array == reg_array2) / reg_array.size <= 1.0
Example #7
0
 def sanitize_page(self, page, page_id):
     LOG = getLogger('processor.RepairSegmentation')
     regions = page.get_AllRegions(classes=['Text'])
     page_image, page_coords, _ = self.workspace.image_from_page(
         page, page_id)
     for region in regions:
         LOG.info('Sanitizing region "%s"', region.id)
         lines = region.get_TextLine()
         if not lines:
             LOG.warning('Page "%s" region "%s" contains no textlines',
                         page_id, region.id)
             continue
         heights = []
         tops = []
         # get labels:
         region_mask = np.zeros((page_image.height, page_image.width),
                                dtype=np.uint8)
         for line in lines:
             line_polygon = coordinates_of_segment(line, page_image,
                                                   page_coords)
             line_xywh = xywh_from_polygon(line_polygon)
             heights.append(line_xywh['h'])
             tops.append(line_xywh['y'])
             region_mask[draw.polygon(line_polygon[:, 1], line_polygon[:,
                                                                       0],
                                      region_mask.shape)] = 1
             region_mask[draw.polygon_perimeter(line_polygon[:, 1],
                                                line_polygon[:, 0],
                                                region_mask.shape)] = 1
         # estimate scale:
         heights = np.array(heights)
         scale = int(np.max(heights))
         tops = np.array(tops)
         order = np.argsort(tops)
         heights = heights[order]
         tops = tops[order]
         if len(lines) > 1:
             # if interline spacing is larger than line height, use this
             bottoms = tops + heights
             deltas = tops[1:] - bottoms[:-1]
             scale = max(scale, int(np.max(deltas)))
         # close labels:
         region_mask = np.pad(region_mask, scale)  # protect edges
         region_mask = np.array(morphology.binary_closing(
             region_mask, np.ones((scale, 1))),
                                dtype=np.uint8)
         region_mask = region_mask[scale:-scale, scale:-scale]  # unprotect
         # extend margins (to ensure simplified hull polygon is outside children):
         region_mask = filters.maximum_filter(region_mask,
                                              3)  # 1px in each direction
         # find outer contour (parts):
         contours, _ = cv2.findContours(region_mask, cv2.RETR_EXTERNAL,
                                        cv2.CHAIN_APPROX_SIMPLE)
         # determine areas of parts:
         areas = [cv2.contourArea(contour) for contour in contours]
         total_area = sum(areas)
         if not total_area:
             # ignore if too small
             LOG.warning('Zero contour area in region "%s"', region.id)
             continue
         # pick contour and convert to absolute:
         region_polygon = None
         for i, contour in enumerate(contours):
             area = areas[i]
             if area / total_area < 0.1:
                 LOG.warning(
                     'Ignoring contour %d too small (%d/%d) in region "%s"',
                     i, area, total_area, region.id)
                 continue
             # simplify shape (until valid):
             # can produce invalid (self-intersecting) polygons:
             #polygon = cv2.approxPolyDP(contour, 2, False)[:, 0, ::] # already ordered x,y
             polygon = contour[:, 0, ::]  # already ordered x,y
             polygon = Polygon(polygon).simplify(1)
             polygon = make_valid(polygon)
             polygon = polygon.exterior.coords[:-1]  # keep open
             if len(polygon) < 4:
                 LOG.warning(
                     'Ignoring contour %d less than 4 points in region "%s"',
                     i, region.id)
                 continue
             if region_polygon is not None:
                 LOG.error(
                     'Skipping region "%s" due to non-contiguous contours',
                     region.id)
                 region_polygon = None
                 break
             region_polygon = coordinates_for_segment(
                 polygon, page_image, page_coords)
         if region_polygon is not None:
             LOG.info('Using new coordinates for region "%s"', region.id)
             region.get_Coords().set_points(
                 points_from_polygon(region_polygon))