def test_membername(self): class Klazz: def __init__(self): self.prop = 42 instance = Klazz() self.assertEqual(membername(instance, 42), 'prop')
def _process_region(self, it, region, rogroup, region_image, region_coords): LOG = getLogger('processor.TesserocrSegmentTable') # equivalent to GetComponentImages with raw_image=True, # (which would also give raw coordinates), # except we are also interested in the iterator's BlockType() here, index = 0 if rogroup: for elem in (rogroup.get_RegionRefIndexed() + rogroup.get_OrderedGroupIndexed() + rogroup.get_UnorderedGroupIndexed()): if elem.index >= index: index = elem.index + 1 while it and not it.Empty(RIL.BLOCK): bbox = it.BoundingBox(RIL.BLOCK) polygon = polygon_from_x0y0x1y1(bbox) polygon = coordinates_for_segment(polygon, region_image, region_coords) points = points_from_polygon(polygon) coords = CoordsType(points=points) # if xywh['w'] < 30 or xywh['h'] < 30: # LOG.info('Ignoring too small region: %s', points) # it.Next(RIL.BLOCK) # continue # # add the region reference in the reading order element # (but ignore non-text regions entirely) ID = region.id + "_%04d" % index subregion = TextRegionType(id=ID, Coords=coords, type=TextTypeSimpleType.PARAGRAPH) block_type = it.BlockType() if block_type == PT.FLOWING_TEXT: pass elif block_type == PT.HEADING_TEXT: subregion.set_type(TextTypeSimpleType.HEADING) elif block_type == PT.PULLOUT_TEXT: subregion.set_type(TextTypeSimpleType.FLOATING) elif block_type == PT.CAPTION_TEXT: subregion.set_type(TextTypeSimpleType.CAPTION) elif block_type == PT.VERTICAL_TEXT: subregion.set_orientation(90.0) else: it.Next(RIL.BLOCK) continue LOG.info("Detected cell '%s': %s (%s)", ID, points, membername(PT, block_type)) region.add_TextRegion(subregion) if rogroup: rogroup.add_RegionRefIndexed( RegionRefIndexedType(regionRef=ID, index=index)) # # iterator increment # index += 1 it.Next(RIL.BLOCK)
def _process_segment(self, tessapi, segment, image, xywh, where, page_id, file_id): LOG = getLogger('processor.TesserocrDeskew') if not image.width or not image.height: LOG.warning("Skipping %s with zero size", where) return angle0 = xywh[ 'angle'] # deskewing (w.r.t. top image) already applied to image angle = 0. # additional angle to be applied at current level tessapi.SetImage(image) #tessapi.SetPageSegMode(PSM.AUTO_OSD) # # orientation/script # osr = tessapi.DetectOrientationScript() if osr: assert not math.isnan(osr['orient_conf']), \ "orientation detection failed (Tesseract probably compiled without legacy OEM, or osd model not installed)" if osr['orient_conf'] < self.parameter[ 'min_orientation_confidence']: LOG.info( 'ignoring OSD orientation result %d° clockwise due to low confidence %.0f in %s', osr['orient_deg'], osr['orient_conf'], where) else: LOG.info( 'applying OSD orientation result %d° clockwise with high confidence %.0f in %s', osr['orient_deg'], osr['orient_conf'], where) # defined as 'the detected clockwise rotation of the input image' # i.e. the same amount to be applied counter-clockwise for deskewing: angle = osr['orient_deg'] assert not math.isnan(osr['script_conf']), \ "script detection failed (Tesseract probably compiled without legacy OEM, or osd model not installed)" if osr['script_conf'] < 10: LOG.info( 'ignoring OSD script result "%s" due to low confidence %.0f in %s', osr['script_name'], osr['script_conf'], where) else: LOG.info( 'applying OSD script result "%s" with high confidence %.0f in %s', osr['script_name'], osr['script_conf'], where) if isinstance(segment, (TextRegionType, PageType)): segment.set_primaryScript({ "Arabic": "Arab - Arabic", "Armenian": "Armn - Armenian", "Bengali": "Armn - Armenian", "Canadian_Aboriginal": "Cans - Unified Canadian Aboriginal Syllabics", "Cherokee": "Cher - Cherokee", "Common": "Latn - Latin", # not in scripts/ "Cyrillic": "Cyrl - Cyrillic", "Devanagari": "Deva - Devanagari (Nagari)", "Ethiopic": "Ethi - Ethiopic", "Fraktur": "Latf - Latin (Fraktur variant)", "Georgian": "Geor - Georgian (Mkhedruli)", "Greek": "Grek - Greek", "Gujarati": "Gujr - Gujarati", "Gurmukhi": "Guru - Gurmukhi", "Han": "Hant - Han (Traditional variant)", # not in scripts/ "Hangul": "Hang - Hangul", "Hangul_vert": "Hang - Hangul", "HanS": "Hans - Han (Simplified variant)", "HanS_vert": "Hans - Han (Simplified variant)", "HanT": "Hant - Han (Traditional variant)", "HanT_vert": "Hant - Han (Traditional variant)", "Hebrew": "Hebr - Hebrew", "Hiragana": "Jpan - Japanese", # not in scripts/ "Japanese": "Jpan - Japanese", "Japanese_vert": "Jpan - Japanese", "Kannada": "Knda - Kannada", "Katakana": "Jpan - Japanese", # not in scripts/ "Khmer": "Khmr - Khmer", "Lao": "Laoo - Lao", "Latin": "Latn - Latin", "Malayalam": "Mlym - Malayalam", "Myanmar": "Mymr - Myanmar (Burmese)", "Oriya": "Orya - Oriya", "Sinhala": "Sinh - Sinhala", "Syriac": "Syrc - Syriac", "Tamil": "Taml - Tamil", "Telugu": "Telu - Telugu", "Thaana": "Thaa - Thaana", "Thai": "Thai - Thai", "Tibetan": "Tibt - Tibetan", "Vietnamese": "Tavt - Tai Viet", }.get(osr['script_name'], "Latn - Latin")) else: LOG.warning('no OSD result in %s', where) # # orientation/skew # layout = tessapi.AnalyseLayout() if not layout: LOG.warning('no result iterator in %s', where) return orientation, writing_direction, textline_order, deskew_angle = layout.Orientation( ) if isinstance(segment, (TextRegionType, PageType)): segment.set_readingDirection({ WritingDirection.LEFT_TO_RIGHT: 'left-to-right', WritingDirection.RIGHT_TO_LEFT: 'right-to-left', WritingDirection.TOP_TO_BOTTOM: 'top-to-bottom' }.get(writing_direction, 'bottom-to-top')) segment.set_textLineOrder({ TextlineOrder.LEFT_TO_RIGHT: 'left-to-right', TextlineOrder.RIGHT_TO_LEFT: 'right-to-left', TextlineOrder.TOP_TO_BOTTOM: 'top-to-bottom' }.get(textline_order, 'bottom-to-top')) # baseline = layout.Baseline(RIL.BLOCK) # if baseline: # points = points_from_x0y0x1y1(list(baseline[0]) + list(baseline[1])) # segment.add_Baseline(BaselineType(points=points)) # defined as 'how many radians does one have to rotate the block anti-clockwise' # i.e. positive amount to be applied counter-clockwise for deskewing: deskew_angle *= 180 / math.pi LOG.info('orientation/deskewing for %s: %s / %s / %s / %.3f°', where, membername(Orientation, orientation), membername(WritingDirection, writing_direction), membername(TextlineOrder, textline_order), deskew_angle) # defined as 'the amount of clockwise rotation to be applied to the input image' # i.e. the negative amount to be applied counter-clockwise for deskewing: # (as defined in Tesseract OrientationIdToValue): angle2 = { Orientation.PAGE_RIGHT: 90, Orientation.PAGE_DOWN: 180, Orientation.PAGE_LEFT: 270 }.get(orientation, 0) if angle2 != angle: # This effectively ignores Orientation from AnalyseLayout, # because it is usually wrong when it deviates from OSD results. # (We do keep deskew_angle, though – see below.) LOG.warning( 'inconsistent angles from layout analysis (%d) and orientation detection (%d) in %s', angle2, angle, where) # annotate result: angle += deskew_angle # page angle: PAGE @orientation is defined clockwise, # whereas PIL/ndimage rotation is in mathematical direction: orientation = -(angle + angle0) orientation = 180 - (180 - orientation) % 360 # map to [-179.999,180] segment.set_orientation( orientation) # also removes all deskewed AlternativeImages # Tesseract layout analysis already rotates the image, even for each # sub-segment (depending on RIL), but the accuracy is not as good # as setting the image to the sub-segments and running without iterator. # (These images can be queried via GetBinaryImage/GetImage, cf. segment_region) # Unfortunately, it does _not_ use expand=True, but chops off corners. # So we must do it here from the original image ourselves. # We can delegate to OCR-D core for reflection, deskewing and re-cropping: if isinstance(segment, PageType): image, xywh, _ = self.workspace.image_from_page(segment, page_id, fill='background', transparency=True) else: image, xywh = self.workspace.image_from_segment(segment, image, xywh, fill='background', transparency=True) if not angle: # zero rotation does not change coordinates, # but assures consuming processors that the # workflow had deskewing xywh['features'] += ',deskewed' features = xywh['features'] # features already applied to image # update METS (add the image file): file_path = self.workspace.save_image_file( image, file_id + '.IMG-DESKEW', page_id=page_id, file_grp=self.output_file_grp) # update PAGE (reference the image file): segment.add_AlternativeImage( AlternativeImageType(filename=file_path, comments=features))
def process(self): """Performs region segmentation by reading mask images in pseudo-colour. Open and deserialize each PAGE input file (or generate from image input file) from the first input file group, as well as mask image file from the second. Then iterate over all connected (equally colored) mask segments and compute convex hull contours for them. Convert them to polygons, and look up their color value in ``colordict`` to instantiate the appropriate region types (optionally with subtype). Instantiate and annotate regions accordingly. Produce a new output file by serialising the resulting hierarchy. """ colordict = self.parameter['colordict'] if not colordict: LOG.info('Using default PAGE colordict') colordict = dict(('#' + col, name) for name, col in CLASSES.items() if name) typedict = {"TextRegion": TextTypeSimpleType, "GraphicRegion": GraphicsTypeSimpleType, "ChartType": ChartTypeSimpleType} ifgs = self.input_file_grp.split(",") # input file groups if len(ifgs) != 2: raise Exception("need 2 input file groups (base and mask)") # collect input file tuples ifts = self.zip_input_files(ifgs) # input file tuples # process input file tuples for n, ift in enumerate(ifts): input_file, segmentation_file = ift LOG.info("processing page %s", input_file.pageId) pcgts = page_from_file(self.workspace.download_file(input_file)) page = pcgts.get_Page() # add metadata about this operation and its runtime parameters: metadata = pcgts.get_Metadata() # ensured by from_file() metadata.add_MetadataItem( MetadataItemType(type_="processingStep", name=self.ocrd_tool['steps'][0], value=TOOL, Labels=[LabelsType( externalModel="ocrd-tool", externalId="parameters", Label=[LabelType(type_=name, value=self.parameter[name]) for name in self.parameter.keys()])])) # import mask image segmentation_filename = self.workspace.download_file(segmentation_file).local_filename with pushd_popd(self.workspace.directory): segmentation_pil = Image.open(segmentation_filename) has_alpha = segmentation_pil.mode == 'RGBA' if has_alpha: colorformat = "#%08X" else: colorformat = "#%06X" if segmentation_pil.mode != 'RGB': segmentation_pil = segmentation_pil.convert('RGB') # convert to array segmentation_array = np.array(segmentation_pil) # collapse 3 color channels segmentation_array = segmentation_array.dot( np.array([2**24, 2**16, 2**8, 1], np.uint32)[0 if has_alpha else 1:]) # partition mapped colors vs background colors = np.unique(segmentation_array) bgcolors = [] for i, color in enumerate(colors): colorname = colorformat % color if (colorname not in colordict or not colordict[colorname]): #raise Exception("Unknown color %s (not in colordict)" % colorname) LOG.info("Ignoring background color %s", colorname) bgcolors.append(i) background = np.zeros_like(segmentation_array, np.uint8) if bgcolors: for i in bgcolors: background += np.array(segmentation_array == colors[i], np.uint8) colors = np.delete(colors, bgcolors, 0) # iterate over mask for each mapped color/class regionno = 0 for color in colors: # get region (sub)type colorname = colorformat % color classname = colordict[colorname] regiontype = None custom = None if ":" in classname: classname, regiontype = classname.split(":") if classname in typedict: typename = membername(typedict[classname], regiontype) if typename == regiontype: # not predefined in PAGE: use other + custom custom = "subtype:%s" % regiontype regiontype = "other" else: custom = "subtype:%s" % regiontype if classname + "Type" not in globals(): raise Exception("Unknown class '%s' for color %s in colordict" % (classname, colorname)) classtype = globals()[classname + "Type"] if classtype is BorderType: # mask from all non-background regions classmask = 1 - background else: # mask from current color/class classmask = np.array(segmentation_array == color, np.uint8) if not np.count_nonzero(classmask): continue # now get the contours and make polygons for them contours, _ = cv2.findContours(classmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: # (could also just take bounding boxes to avoid islands/inclusions...) area = cv2.contourArea(contour) # filter too small regions area_pct = area / np.prod(segmentation_array.shape) * 100 if area < 100 and area_pct < 0.1: LOG.warning('ignoring contour of only %.1f%% area for %s', area_pct, classname) continue LOG.info('found region %s:%s:%s with area %.1f%%', classname, regiontype or '', custom or '', area_pct) # simplify shape poly = cv2.approxPolyDP(contour, 2, False)[:, 0, ::] # already ordered x,y if len(poly) < 4: LOG.warning('ignoring contour of only %d points (area %.1f%%) for %s', len(poly), area_pct, classname) continue if classtype is BorderType: # add Border page.set_Border(BorderType(Coords=CoordsType(points=points_from_polygon(poly)))) break else: # instantiate region regionno += 1 region = classtype(id="region_%d" % regionno, type_=regiontype, custom=custom, Coords=CoordsType(points=points_from_polygon(poly))) # add region getattr(page, 'add_%s' % classname)(region) # Use input_file's basename for the new file - # this way the files retain the same basenames: file_id = input_file.ID.replace(ifgs[0], self.output_file_grp) if file_id == input_file.ID: file_id = concat_padded(self.output_file_grp, n) self.workspace.add_file( ID=file_id, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype=MIMETYPE_PAGE, local_filename=os.path.join(self.output_file_grp, file_id + '.xml'), content=to_xml(pcgts))
def _process_page(self, it, page, page_image, page_coords, page_id): LOG = getLogger('processor.TesserocrSegmentRegion') # equivalent to GetComponentImages with raw_image=True, # (which would also give raw coordinates), # except we are also interested in the iterator's BlockType() here, # and its BlockPolygon() index = 0 ro = page.get_ReadingOrder() if not ro: ro = ReadingOrderType() page.set_ReadingOrder(ro) og = ro.get_OrderedGroup() if og: # start counting from largest existing index for elem in (og.get_RegionRefIndexed() + og.get_OrderedGroupIndexed() + og.get_UnorderedGroupIndexed()): if elem.index >= index: index = elem.index + 1 else: # new top-level group og = OrderedGroupType(id="reading-order") ro.set_OrderedGroup(og) while it and not it.Empty(RIL.BLOCK): # (padding will be passed to both BoundingBox and GetImage) # (actually, Tesseract honours padding only on the left and bottom, # whereas right and top are increased less!) bbox = it.BoundingBox(RIL.BLOCK, padding=self.parameter['padding']) # sometimes these polygons are not planar, which causes # PIL.ImageDraw.Draw.polygon (and likely others as well) # to misbehave; however, PAGE coordinate semantics prohibit # multi-path polygons! # (probably a bug in Tesseract itself, cf. tesseract#2826): if self.parameter['crop_polygons']: polygon = it.BlockPolygon() else: polygon = polygon_from_x0y0x1y1(bbox) polygon = coordinates_for_segment(polygon, page_image, page_coords) polygon2 = polygon_for_parent(polygon, page) if polygon2 is not None: polygon = polygon2 points = points_from_polygon(polygon) coords = CoordsType(points=points) if polygon2 is None: LOG.info('Ignoring extant region: %s', points) it.Next(RIL.BLOCK) continue # if xywh['w'] < 30 or xywh['h'] < 30: # LOG.info('Ignoring too small region: %s', points) # it.Next(RIL.BLOCK) # continue # region_image_bin = it.GetBinaryImage(RIL.BLOCK) # if not region_image_bin.getbbox(): # LOG.info('Ignoring binary-empty region: %s', points) # it.Next(RIL.BLOCK) # continue # # add the region reference in the reading order element # (will be removed again if Separator/Noise region below) ID = "region%04d" % index og.add_RegionRefIndexed( RegionRefIndexedType(regionRef=ID, index=index)) # # region type switch # block_type = it.BlockType() if block_type in [ PT.FLOWING_TEXT, PT.HEADING_TEXT, PT.PULLOUT_TEXT, PT.CAPTION_TEXT, # TABLE is contained in PTIsTextType, but # it is a bad idea to create a TextRegion # for it (better set `find_tables` False): # PT.TABLE, # will also get a 90° @orientation # (but that can be overridden by deskew/OSD): PT.VERTICAL_TEXT ]: region = TextRegionType(id=ID, Coords=coords, type=TextTypeSimpleType.PARAGRAPH) if block_type == PT.VERTICAL_TEXT: region.set_orientation(90.0) elif block_type == PT.HEADING_TEXT: region.set_type(TextTypeSimpleType.HEADING) elif block_type == PT.PULLOUT_TEXT: region.set_type(TextTypeSimpleType.FLOATING) elif block_type == PT.CAPTION_TEXT: region.set_type(TextTypeSimpleType.CAPTION) page.add_TextRegion(region) if self.parameter['sparse_text']: region.set_type(TextTypeSimpleType.OTHER) region.add_TextLine( TextLineType(id=region.id + '_line', Coords=coords)) elif block_type in [ PT.FLOWING_IMAGE, PT.HEADING_IMAGE, PT.PULLOUT_IMAGE ]: region = ImageRegionType(id=ID, Coords=coords) page.add_ImageRegion(region) elif block_type in [PT.HORZ_LINE, PT.VERT_LINE]: region = SeparatorRegionType(id=ID, Coords=coords) page.add_SeparatorRegion(region) # undo appending in ReadingOrder og.set_RegionRefIndexed(og.get_RegionRefIndexed()[:-1]) elif block_type in [PT.INLINE_EQUATION, PT.EQUATION]: region = MathsRegionType(id=ID, Coords=coords) page.add_MathsRegion(region) elif block_type == PT.TABLE: # without API access to StructuredTable we cannot # do much for a TableRegionType (i.e. nrows, ncols, # coordinates of cells for recursive regions etc), # but this can be achieved afterwards by segment-table region = TableRegionType(id=ID, Coords=coords) page.add_TableRegion(region) else: region = NoiseRegionType(id=ID, Coords=coords) page.add_NoiseRegion() # undo appending in ReadingOrder og.set_RegionRefIndexed(og.get_RegionRefIndexed()[:-1]) LOG.info("Detected region '%s': %s (%s)", ID, points, membername(PT, block_type)) # # iterator increment # index += 1 it.Next(RIL.BLOCK) if (not og.get_RegionRefIndexed() and not og.get_OrderedGroupIndexed() and not og.get_UnorderedGroupIndexed()): # schema forbids empty OrderedGroup ro.set_OrderedGroup(None)
def process(self): """Performs region segmentation by reading from COCO annotations. Open and deserialize the COCO JSON file from the second input file group. (It lists region categories/subtypes, file names and segmentations for all pages.) Open and deserialize each PAGE input file (or generate from image input file) from the first input file group. Now find this page in COCO: - try to match the PAGE ``imageFilename`` or METS file path matches to some COCO ``file_name``, otherwise - try to match the numeric part of the METS physical page ID to some COCO ``id``, otherwise - skip with an error. Then create and add a region for each ``segmentation``, converting its polygon to coordinate points and its COCO category to a region type (and subtype), either for a PubLayNet classification or PAGE classification (as produced by ocrd-segment-extract-pages), as indicated by ``source``. Produce a new output file by serialising the resulting hierarchy. Afterwards, if there are still COCO images left unaccounted for (i.e. without corresponding input files), then show a warning. """ LOG = getLogger('processor.ImportCOCOSegmentation') # Load JSON assert_file_grp_cardinality(self.input_file_grp, 2, 'base and COCO') # pylint: disable=attribute-defined-outside-init self.input_file_grp, coco_grp = self.input_file_grp.split(',') # pylint: disable=attribute-defined-outside-init if not self.input_files: LOG.warning('No input files to process') return if coco_grp in self.workspace.mets.file_groups: try: cocofile = next( f for f in self.workspace.mets.find_files(fileGrp=coco_grp) # if f.mimetype == 'application/json' and not f.pageId if not f.pageId) except StopIteration: raise Exception( "no non-page-specific file in second file group (COCO file)", coco_grp) cocofile = self.workspace.download_file(cocofile).local_filename elif os.path.isfile(coco_grp): cocofile = coco_grp else: raise Exception("file not found in second file group (COCO file)", coco_grp) LOG.info('Loading COCO annotations from "%s" into memory...', cocofile) with open(cocofile, 'r') as inp: coco = json.load(inp) LOG.info('Loaded JSON for %d images with %d regions in %d categories', len(coco['images']), len(coco['annotations']), len(coco['categories'])) coco_source = 'PubLayNet' # Convert to usable dicts # classes: categories = dict() subcategories = dict() for cat in coco['categories']: if cat['source'] == 'PAGE': coco_source = 'PAGE' if 'supercategory' in cat and cat['supercategory']: categories[cat['id']] = cat['supercategory'] subcategories[cat['id']] = cat['name'] else: categories[cat['id']] = cat['name'] # images and annotations: images_by_id = dict() images_by_filename = dict() for image in coco['images']: images_by_id[image['id']] = image images_by_filename[image['file_name']] = image for annotation in coco['annotations']: image = images_by_id[annotation['image_id']] regions = image.setdefault('regions', list()) regions.append(annotation) del coco LOG.info('Converting %s annotations into PAGE-XML', coco_source) for n, input_file in enumerate(self.input_files): page_id = input_file.pageId or input_file.ID num_page_id = int(page_id.strip(page_id.strip("0123456789"))) LOG.info("INPUT FILE %i / %s", n, page_id) pcgts = page_from_file(self.workspace.download_file(input_file)) self.add_metadata(pcgts) page = pcgts.get_Page() # find COCO image if page.imageFilename in images_by_filename: image = images_by_filename[page.imageFilename] elif num_page_id in images_by_id: image = images_by_id[num_page_id] else: LOG.error('Page "%s" / file "%s" not found in COCO', page_id, page.imageFilename) # todo: maybe we should at least write the (unchanged) output PAGE? continue if image['width'] != page.imageWidth: LOG.error( 'Page "%s" width %d does not match annotated width %d', page_id, page.imageWidth, image['width']) if image['height'] != page.imageHeight: LOG.error( 'Page "%s" height %d does not match annotated height %d', page_id, page.imageHeight, image['height']) # todo: remove existing segmentation first? for region in image['regions']: assert isinstance( region['segmentation'], list), "importing RLE/mask segmentation not implemented" polygon = np.array(region['segmentation']) polygon = np.reshape(polygon, (polygon.shape[1] // 2, 2)) coords = CoordsType(points=points_from_polygon(polygon)) category = categories[region['category_id']] if region['category_id'] in subcategories: subcategory = subcategories[region['category_id']] else: subcategory = None region_id = 'r' + str(region['id']) LOG.info('Adding region %s:%s [area %d]', category, subcategory or '', region['area']) if coco_source == 'PubLayNet': if category == 'text': region_obj = TextRegionType( id=region_id, Coords=coords, type_=TextTypeSimpleType.PARAGRAPH) page.add_TextRegion(region_obj) elif category == 'title': region_obj = TextRegionType( id=region_id, Coords=coords, type_=TextTypeSimpleType.HEADING) # CAPTION? page.add_TextRegion(region_obj) elif category == 'list': region_obj = TextRegionType( id=region_id, Coords=coords, type_=TextTypeSimpleType.LISTLABEL) # OTHER? page.add_TextRegion(region_obj) elif category == 'table': region_obj = TableRegionType(id=region_id, Coords=coords) page.add_TableRegion(region_obj) elif category == 'figure': region_obj = ImageRegionType(id=region_id, Coords=coords) page.add_ImageRegion(region_obj) else: raise Exception('unknown region category: %s' % category) else: # 'PAGE' args = {'id': region_id, 'Coords': coords} if subcategory: typedict = { "TextRegion": TextTypeSimpleType, "GraphicRegion": GraphicsTypeSimpleType, "ChartType": ChartTypeSimpleType } if category in typedict: subtype = membername(typedict[category], subcategory) if subtype == subcategory: # not predefined in PAGE: use other + custom args['custom'] = "subtype:%s" % subcategory args['type_'] = "other" else: args['type_'] = subcategory else: args['custom'] = "subtype:%s" % subcategory if category + 'Type' not in globals(): raise Exception('unknown region category: %s' % category) region_type = globals()[category + 'Type'] if region_type is BorderType: page.set_Border(BorderType(Coords=coords)) else: region_obj = region_type(**args) getattr(page, 'add_%s' % category)(region_obj) # remove image from dicts images_by_id.pop(num_page_id, None) images_by_filename.pop(page.imageFilename, None) file_id = make_file_id(input_file, self.output_file_grp) self.workspace.add_file(ID=file_id, file_grp=self.output_file_grp, pageId=input_file.pageId, mimetype=MIMETYPE_PAGE, local_filename=os.path.join( self.output_file_grp, file_id + '.xml'), content=to_xml(pcgts)) # warn of remaining COCO images if images_by_filename and not self.page_id: LOG.warning('%d images remain unaccounted for after processing', len(images_by_filename)) if LOG.isEnabledFor(logging.DEBUG): for filename in images_by_filename: LOG.debug('not found in workspace: "%s"', filename)