def __dump(*args, **kwargs): if not Config.instance().config('argv:dump', False).asbool(): return func(*args, **kwargs) if isinstance(dumper, type) and issubclass(dumper, BaseDumper): dumper_ = dumper() else: dumper_ = dumper tmp = isinstance(dumper_, BaseDumper) if tmp: try: dumper_.__precall__(args=args, kwargs=kwargs) except: log.exception('__precall__ of %s failed:', dumper_) result = func(*args, **kwargs) if tmp: try: dumper_.__postcall__(result, args=args, kwargs=kwargs) except: log.exception('__postcall__ of %s failed:', dumper_) try: cfg = Config.instance() dump_dir = cfg.config( 'main:dump_dir', os.path.join(Config.ROOT_DIR, 'data', 'dump')).asstring() infile = os.path.basename(cfg.config('argv:infile').asstring()) dump_dir = os.path.join(dump_dir, infile, dumper_.func_name) if not os.path.isdir(dump_dir): os.makedirs(dump_dir) cfg.save(os.path.join(dump_dir, 'config.ini')) log.debug('executing dump function: %s', dumper_) dumper_(result, args=args, kwargs=kwargs, dump_dir=dump_dir) except: log.exception('error while performing dump using function %s:', dumper_) return result
def config(self, key, default=None): """Get value of configuration parameter named ``key``. If ``default`` evaluates to ``False``, class attribute with prefix ``__ocr_`` and postfix ``__`` is used (if exists).""" key_ = "plugins:ocr:%s:%s" % (self.__class__.__name__, key) return Config.instance().config( key_, default or getattr(self.__class__, "__ocr_%s__" % key, None))
def __call__(self, result, args=None, kwargs=None, dump_dir=None): cbounds = Config.instance().config('argv:cbounds', (0, 0, 255)).value image = args[0].image.copy() labels = SegmentGroup(None) labels.segments.update(self.text_used) labels.display_bounds(image, color=cbounds) image.save(os.path.join(dump_dir, 'y-axis-labels.png'))
def _dump_vertical_bars_candidates(result, args=None, kwargs=None, dump_dir=None): cbounds = Config.instance().config('argv:cbounds', (0, 0, 255)).value image = args[0].copy() for b in result: b.bar.display_bounds(image, color=cbounds) b.label.display_bounds(image, color=cbounds) image.save(os.path.join(dump_dir, 'bars-candidates.png'))
def _text_recognition_dump(result, args=None, kwargs=None, dump_dir=None): cbounds = Config.instance().config('argv:cbounds', (0, 0, 255)).value image = args[1] storage = kwargs['storage']['TextRecognitor'] text, text_candidates = storage['text'], storage['text_candidates'] graphical = storage['graphical'] # Dump text region candidates image1 = image.copy() for t in text_candidates: t.display_bounds(image1, color=cbounds) image1.save(os.path.join(dump_dir, 'candidates.png')) # Dump recognized text regions image2 = image.copy() for t in text: t.display_bounds(image2, color=cbounds) image2.save(os.path.join(dump_dir, 'recognized.png')) # Dump difference text candidates and recognized text regions ImageChops.difference(image1, image2).save( os.path.join(dump_dir, 'difference.png')) # Dump remaining graphical segments image3 = Image.create(image.mode, image.width, image.height, background=(255, 255, 255)) for g in graphical: g.display(image3, color=g.color) image3.save(os.path.join(dump_dir, 'graphical.png'))
def _timeit(*args, **kwargs): if not Config.instance().config('argv:timeit', False).asbool(): return func(*args, **kwargs) log.debug('timing function %s', func) start = time.time() try: return func(*args, **kwargs) finally: log.debug("done for %s. Time: %1.3f sec", func, time.time() - start)
def __init__(self, infile, outfile, options=None): """Create new instance of Application class. :param infile: input file path :param outfile: output file path :param options: options instance returned by OptionParser's ``parse_args`` method""" super(Application, self).__init__() if not infile: raise ValueError('infile: value is missing') if not outfile: raise ValueError('outfile: value is missing') self.infile = infile self.outfile = outfile if options: argv = { 'infile': infile, # Input file path 'outfile': outfile, # Output file path 'timeit': options.timeit, # Enable or disable `timeit` decorator 'dump': options.dump, # Enable or disable `dump` decorator 'cbounds': _decode_color(options.cbounds) } Config.instance(config=options.config, argv=argv)
def load_all(cls): """Load all available and enabled OCR plugin classes and return it as list sorted by plugin priority.""" config = Config.instance() result = [] for entry in os.listdir(os.path.join(*tuple(__name__.split('.')))): if entry.startswith('_'): continue if not entry.endswith('.py'): continue class_name = ''.join([n.title() for n in entry[:-3].split('_')]) config_prefix = "plugins:ocr:%s" % class_name module = __import__( "%s.%s" % (__name__, entry[:-3]), fromlist=[class_name]) class_ = getattr(module, class_name) if config("%s:enabled" % config_prefix, class_.__ocr_enabled__).asbool(): result.append((class_, config_prefix)) result = sorted( result, key=lambda x: config("%s:priority" % x[1], x[0].__ocr_priority__).asint()) return [r[0] for r in result]
def load_all(cls): """Load all available parsing plugins and return as list of :class:`ParserPluginBase` subclasses. List is sorted by plugin priority. Disabled plugins won't be present in the list.""" config = Config.instance() result = [] for entry in os.listdir(os.path.join(*tuple(__name__.split('.')))): if entry.startswith('_'): continue if not entry.endswith('.py'): continue class_name = "%sParser" % ''.join([n.title() for n in entry[:-3].split('_')]) config_prefix = "plugins:parsers:%s" % class_name module = __import__( "%s.%s" % (__name__, entry[:-3]), fromlist=[class_name]) class_ = getattr(module, class_name) if config("%s:enabled" % config_prefix, class_.__p_enabled__).asbool(): result.append((class_, config_prefix)) result = sorted( result, key=lambda x: config("%s:priority" % x[1], x[0].__p_priority__).asint()) return [r[0] for r in result]