def __init__(self, parent=None): super().__init__(parent) self.ui = extract_ctrl_ui.Ui_ExtractCtrlWidget() self.ui.setupUi(self) self.setWindowIcon(QIcon(':/images/icon.png')) self.setAttribute(Qt.WA_QuitOnClose, False) extract_conf = TdConfig( AppSettings.config_file_path).getExtractConfig() self.ui.spinbox_mser_delta.setValue( extract_conf[TdExtractConfigKey.DELTA]) self.ui.spinbox_mser_area_high.setValue( extract_conf[TdExtractConfigKey.AREA_MAX]) self.ui.spinbox_mser_area_low.setValue( extract_conf[TdExtractConfigKey.AREA_MIN]) self.ui.dspinbox_mser_variation.setValue( extract_conf[TdExtractConfigKey.VARIATION]) for src in extract_conf[TdExtractConfigKey.SRCS]: self.ui.listWidget.addItem(QListWidgetItem(src)) filter_conf = TdConfig( AppSettings.config_file_path).getFilterConfig('extract') self.ui.spinbox_filter_area_lim.setValue( filter_conf[TdFilterConfigKey.AREA_LIM]) self.ui.spinbox_filter_perimeter_low.setValue( filter_conf[TdFilterConfigKey.PERIMETER_LIM][0]) self.ui.spinbox_filter_perimeter_high.setValue( filter_conf[TdFilterConfigKey.PERIMETER_LIM][1]) self.ui.dspinbox_filter_aspect_ratio_low.setValue( filter_conf[TdFilterConfigKey.ASPECT_RATIO_LIM][0]) self.ui.dspinbox_filter_aspect_ratio_high.setValue( filter_conf[TdFilterConfigKey.ASPECT_RATIO_LIM][1]) self.ui.checkbox_filter_abs_aspect_ratio.setChecked( filter_conf[TdFilterConfigKey.ASPECT_RATIO_GT1]) self.ui.dspinbox_filter_occupation_low.setValue( filter_conf[TdFilterConfigKey.OCCUPATION_LIM][0]) self.ui.dspinbox_filter_occupation_high.setValue( filter_conf[TdFilterConfigKey.OCCUPATION_LIM][1]) self.ui.dspinbox_filter_compactness_low.setValue( filter_conf[TdFilterConfigKey.COMPACTNESS_LIM][0]) self.ui.dspinbox_filter_compactness_high.setValue( filter_conf[TdFilterConfigKey.COMPACTNESS_LIM][1]) self.ui.spinbox_filter_width_low.setValue( filter_conf[TdFilterConfigKey.WIDTH_LIM][0]) self.ui.spinbox_filter_width_high.setValue( filter_conf[TdFilterConfigKey.WIDTH_LIM][1]) self.ui.spinbox_filter_height_low.setValue( filter_conf[TdFilterConfigKey.HEIGHT_LIM][0]) self.ui.spinbox_filter_height_high.setValue( filter_conf[TdFilterConfigKey.HEIGHT_LIM][1]) self.setEnabledForFilter(filter_conf[TdFilterConfigKey.FLAG]) self.initConnects() return
def doPreprocess(self): ''' 进行预处理 ''' # 获取参数,进行预处理 svcconf = self.control_panel.getConfiguration() \ if self.control_panel is not None else \ TdConfig(AppSettings.config_file_path).getSVCConfig() ret = self.svc.init(svcconf.get(TdSVCConfigKey.MCONF_PATH, None)) if not ret: return False # 获取输入数据 logger.info("SVC require datas.") self.requireData.emit() logger.info("SVC tell data was recevied") # SVC 检测过滤 final_tl = [] for item in self.input_data[0]: tl = [] name, bg_image, tlregions = item for region in tlregions: sample, _ = crop_rect(bg_image, region) ret = self.svc.predict(sample) if ret: tl.append(region) final_tl.append((name, tlregions, tl)) # 最终合并 ret_final_mtl, ret_final_stl = [], [] for item in final_tl: name, mtl, stl = item ret_final_mtl.extend(mtl) ret_final_stl.extend(stl) merger = TdMergingOverlap() merger.setConfig( TdConfig(AppSettings.config_file_path).getMergeTLConfig()) tlsin1 = merger.mergeTextLine(ret_final_stl) self.output_data = (ret_final_mtl, ret_final_stl, tlsin1) # 显示结果 rgb_image = self.input_data[1].copy() rgb_image = TdMergingTextLine.drawRegions(rgb_image, (255, 255, 255), cv2.LINE_4, ret_final_mtl) rgb_image = TdMergingTextLine.drawRegions(rgb_image, (0, 255, 0), cv2.LINE_4, tlsin1) self.setDisplayCvImage(rgb_image) return self.output_data
def __init__(self, parent=None): super().__init__(parent) self.ui = svc_ctrl_ui.Ui_SVCCtrlWidget() self.ui.setupUi(self) self.setWindowIcon(QIcon(':/images/icon.png')) self.setAttribute(Qt.WA_QuitOnClose, False) svc_conf = TdConfig(AppSettings.config_file_path).getSVCConfig() self.mconf_path = svc_conf.get(TdSVCConfigKey.MCONF_PATH, None) self.ui.linedit_svc_conf_path.setText(str(self.mconf_path)) if Path(self.mconf_path).exists(): with open(self.mconf_path, 'r') as f: mconf = yaml.load(f, Loader=yaml.FullLoader) self.ui.label_config_context.setText(str(mconf)) self.ui.btn_svc_conf_path.clicked.connect(self.onActionBtnClicked)
def getConfig(self): ''' 获取配置 ''' megconf = self.control_panel.getConfiguration() \ if self.control_panel is not None else \ TdConfig(AppSettings.config_file_path).getMergeTLConfig() return megconf
def __init__(self): self.image_path = None self.image_name = None self.config_file_path = None self.gray_type = None self.show_opts = [] self.makesample = False self.save_option = CliSaveOptions.NONE self.preprocessing = TdPreprocessing() self.extracter = TdExtractConnectDomain() self.filter = TdFilter() self.merger = TdMergingTextLine() self.svc = TdSVC() self.ocr = TdOCR() self.config = TdConfig() self.morpher = TdMorphOperator()
def __init__(self, parent=None): super().__init__(parent) self.ui = merge_ctrl_ui.Ui_MergeCtrlWidget() self.ui.setupUi(self) self.setWindowIcon(QIcon(':/images/icon.png')) self.setAttribute(Qt.WA_QuitOnClose, False) # 填充 MergeTLE 参数 merge_conf = TdConfig(AppSettings.config_file_path).getMergeTLConfig() self.ui.linedit_combined_area_size_lim.setText( str(merge_conf[TdMergeTLConfigKey.COMBINED_AREA_SIZE_LIM])) self.ui.dspinbox_overlap_ratio.setValue( merge_conf[TdMergeTLConfigKey.OVERLAP_RATIO]) self.ui.dspinbox_distance.setValue( merge_conf[TdMergeTLConfigKey.DISTANCE]) self.ui.dspinbox_combined_aspect_ratio_low.setValue( merge_conf[TdMergeTLConfigKey.COMBINED_ASPECT_RATIO_LIM][0]) self.ui.dspinbox_combined_aspect_ratio_high.setValue( merge_conf[TdMergeTLConfigKey.COMBINED_ASPECT_RATIO_LIM][1]) self.ui.combobox_strategy.setCurrentIndex( merge_conf[TdMergeTLConfigKey.STRATEGY].value - 1) self.ui.combobox_position_ratio.setCurrentIndex( merge_conf[TdMergeTLConfigKey.POSITION_RATIO].value - 1) self.ui.dspinbox_position_ratio.setValue( merge_conf[TdMergeTLConfigKey.POSITION_RATIO_CONSTANT]) self.ui.dspinbox_position_ratio.setVisible( bool(merge_conf[TdMergeTLConfigKey.POSITION_RATIO] is MergingPositionRatio.CONSTANT)) self.ui.linedit_scope_lim.setText( str(merge_conf[TdMergeTLConfigKey.SCOPE_LIM])) fltr_conf = TdConfig( AppSettings.config_file_path).getFilterConfig("merge") self.ui.checkbox_tl_area_lim_enable.setChecked( bool(TdFilterCheckType.AREA in fltr_conf[TdFilterConfigKey.FLAG])) self.ui.spinbox_tl_area_lim.setValue( fltr_conf[TdFilterConfigKey.AREA_LIM]) self.initConnects() return
def doPreprocess(self): ''' 进行预处理 ''' # 获取参数,进行预处理 config = self.control_panel.getConfiguration() \ if self.control_panel is not None else \ TdConfig(AppSettings.config_file_path).getPrepConfig() self.preprocesser.setConfig(config) self.showVerbose(config) # 显示预处理结果 self.cur_config = config return
def onActionExtractorRequireData(self, srcs, extconf): ''' 获取 Extracter 所需的数据 Args: srcs 源图像 extconf 配置信息 ''' # 设置预处理 config = TdConfig(AppSettings.config_file_path).getPrepConfig() \ if self.preprocess_display_widget.control_panel is None else \ self.preprocess_display_widget.control_panel.getConfiguration() self.preprocess_display_widget.preprocesser.setConfig(config) # 初始化配置 posi_extconf = extconf.copy() nega_extconf = extconf.copy() posi_extconf[TdExtractConfigKey.DIRECTION] = ExtractDirection.Positive nega_extconf[TdExtractConfigKey.DIRECTION] = ExtractDirection.Negitive # 准备预处理后图像 images, image = [], [] for src in srcs: image.clear() grayname, chartype = src.split('.') if grayname == "Gray": ret = self.preprocess_display_widget.preprocesser.ret_gray elif grayname == "Red": ret = self.preprocess_display_widget.preprocesser.ret_red elif grayname == "Green": ret = self.preprocess_display_widget.preprocesser.ret_green elif grayname == "Blue": ret = self.preprocess_display_widget.preprocesser.ret_blue else: ret = None if chartype == "Both" or chartype == "Black": image.append({'name':"%s.Black"%grayname, 'image':ret[1], 'conf':posi_extconf}) if chartype == "Both" or chartype == "Bright": image.append({'name':"%s.Bright"%grayname, 'image':ret[2], 'conf':nega_extconf}) images.extend(image.copy()) self.extract_display_widget.input_images = images msg = "Data is fed for extractor" logger.info(msg)
def __init__(self, parent=None): super().__init__(parent) self.ui = prep_ctrl_ui.Ui_PrepCtrlWidget() self.ui.setupUi(self) self.setWindowIcon(QIcon(':/images/icon.png')) self.setAttribute(Qt.WA_QuitOnClose, False) prep_conf = TdConfig(AppSettings.config_file_path).getPrepConfig() self.ui.linedit_total_pixels.setText( str(prep_conf[TdPrepConfigKeys.TOTAL_PIXELS])) self.ui.spinbox_bilateral_arg1.setValue( prep_conf[TdPrepConfigKeys.BILATERAL][0]) self.ui.spinbox_bilateral_arg2.setValue( prep_conf[TdPrepConfigKeys.BILATERAL][1]) self.ui.spinbox_bilateral_arg3.setValue( prep_conf[TdPrepConfigKeys.BILATERAL][2]) self.ui.spinbox_gaussian_size.setValue( prep_conf[TdPrepConfigKeys.GAUSS_SIZE]) self.ui.dspinbox_offset.setValue(prep_conf[TdPrepConfigKeys.OFFSET]) return
def doPreprocess(self): ''' 进行预处理 ''' # 获取并设置连通域提取配置参数 extconf = self.control_panel.getConfiguration(flag=0) \ if self.control_panel is not None else \ TdConfig(AppSettings.config_file_path).getExtractConfig() fltconf = self.control_panel.getConfiguration(flag=1) \ if self.control_panel is not None else \ TdConfig(AppSettings.config_file_path).getFilterConfig("extract") self.filter.setConfig(fltconf) self.extracter.setConfig(extconf) # 获取输入数据 msg = "Extractor require datas. Srcs:%s." % extconf[ TdExtractConfigKey.SRCS] logger.info(msg) self.requireData.emit(extconf[TdExtractConfigKey.SRCS], extconf) logger.info("Extractor tell data was recevied") # 提取连通域 binarizes, debug_data = [], [] self.extracter.debug.setEnable( bool(extconf[TdExtractConfigKey.VERBOSE])) self.filter.debug.setEnable(bool(extconf[TdExtractConfigKey.VERBOSE])) for image in self.input_images: binarized = self.extracter.extract_with_labels_for_images( [image], self.filter) binarizes.append((image['name'], binarized.copy(), image['image'])) if extconf[TdExtractConfigKey. VERBOSE] and self.extracter.debug.enable: debug_data.extend(self.extracter.debug.data) # 显示 MSER 提取结果 if extconf[TdExtractConfigKey.VERBOSE] and self.extracter.debug.enable: if self.dr_widget is None: self.dr_widget = VerboseDisplayWidget() self.dr_widget.setExtracterVerboseData(debug_data) self.dr_widget.show() # 获取并设置形态学处理配置参数 fltconf = self.control_panel.getConfiguration(flag=1) \ if self.control_panel is not None else \ TdConfig(AppSettings.config_file_path).getFilterConfig("morph") self.filter.setConfig(fltconf) # 形态学处理 self.output_regionlist = [] for binarized in binarizes: regions = self.morpher.morph_operation(binarized[1], binarized[2], self.filter) self.output_regionlist.append( ((binarized[0], binarized[1], regions.copy()))) # 显示 MORPH 处理结果 if extconf[TdExtractConfigKey.VERBOSE_MORPH]: verbose_data = {} for item in self.output_regionlist: verbose_data[item[0]] = self.morpher.getMaskImage( item[1], item[2]) if self.dr_widget is None: self.dr_widget = VerboseDisplayWidget() self.dr_widget.setPrepVerboseData(verbose_data) self.dr_widget.show() return
class Cli: ''' text-detection 命令行程序(CLI) ''' def __init__(self): self.image_path = None self.image_name = None self.config_file_path = None self.gray_type = None self.show_opts = [] self.makesample = False self.save_option = CliSaveOptions.NONE self.preprocessing = TdPreprocessing() self.extracter = TdExtractConnectDomain() self.filter = TdFilter() self.merger = TdMergingTextLine() self.svc = TdSVC() self.ocr = TdOCR() self.config = TdConfig() self.morpher = TdMorphOperator() def run(self): ''' 运行命令 ''' # 解析命令行参数 self.parseArgs() logger.info("CLI RUN, Image:%s", self.image_path) # 加载配置 self.config.loadConfigFromFile(self.config_file_path) # 读取输入 input_image = cv2.imread(self.image_path) rgb_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB) # 预处理 self.prepFunc(rgb_image) # 连通域提取 binarizes = self.extractFunc() # 形态学优化 regionlist = self.morphFunc(binarizes) # 文本行合并 tlregionlist = self.mergingFunc(regionlist) self.makeSVCSample(tlregionlist) # 纹理特征过滤 mconf_path = self.config.getSVCConfig().get(TdSVCConfigKey.MCONF_PATH, None) if mconf_path is not None: final_tl = self.svcFunc(tlregionlist, mconf_path) # 合并多通道文本行 texts_ret = self.finalRet("Image", final_tl) # OCR 识别 self.ocrFunc(texts_ret) return True def parseArgs(self): ''' 解析命令行参数 ''' try: opts, _ = getopt.getopt(sys.argv[1:], "i:", \ ["show=", "config=", "gray=", "makesample", "save=", "debug=", "help"]) except getopt.GetoptError: print("argv error") sys.exit(1) for cmd, arg in opts: if cmd in "-i": self.image_path = arg self.image_name = self.image_path.split('/')[-1][0:-4] elif cmd in "--config": self.config_file_path = arg elif cmd in "--show": arguments = arg.split(',') options = { "prep": CliShowOptions.SHOW_PREP, "extract": CliShowOptions.SHOW_EXTRACT, "morph": CliShowOptions.SHOW_MORPH, "merge": CliShowOptions.SHOW_MERGE, "textline": CliShowOptions.SHOW_SVC, "final": CliShowOptions.SHOW_RESULT } self.show_opts = [ options.get(i, CliShowOptions.SHOW_NONE) for i in arguments ] elif cmd in '--gray': arguments = arg.split(',') options = { "gray": CliGrayType.GRAY, "blue": CliGrayType.BLUE, "green": CliGrayType.GREEN, "red": CliGrayType.RED } self.gray_type = [ options.get(i, CliGrayType.GRAY) for i in arguments ] elif cmd in '--debug': arguments = arg.split(',') if 'prep' in arguments: self.preprocessing.debug.setEnable(True) elif cmd in '--makesample': self.makesample = True elif cmd in '--save': options = { "merge": CliSaveOptions.MERGE, "svc": CliSaveOptions.SVC } self.save_option = options.get(arg, None) else: usage() sys.exit(1) def showPrep(self): ''' 显示预处理结果 ''' if CliShowOptions.SHOW_PREP not in self.show_opts: return for gray in self.gray_type: if gray is CliGrayType.GRAY: gray_name = "Gray" ret = self.preprocessing.ret_gray if gray is CliGrayType.BLUE: gray_name = "Blue Channel" ret = self.preprocessing.ret_blue if gray is CliGrayType.GREEN: gray_name = "Green Channel" ret = self.preprocessing.ret_green if gray is CliGrayType.RED: gray_name = "Red Channel" ret = self.preprocessing.ret_red if not self.preprocessing.debug.enable: plt.subplot(131) plt.title("Black chars") plt.imshow(ret[1], "gray") plt.subplot(132) plt.imshow(ret[0], "gray") plt.title(gray_name) plt.subplot(133) plt.title("Bright chars") plt.imshow(ret[2], "gray") else: data = self.preprocessing.debug.data total = len(data) cols = np.uint8(np.ceil(sqrt(total))) rows = np.uint8(np.floor(sqrt(total))) for i in range(1, total + 1): plt.subplot(rows * 100 + cols * 10 + i) plt.title(data[i - 1][0]) plt.imshow(data[i - 1][1], "gray") plt.show() def showExtract(self, binarizes): ''' 显示连通域提取结果 ''' if CliShowOptions.SHOW_EXTRACT not in self.show_opts: return total = len(binarizes) rows, cols = int(floor(sqrt(total))), int(ceil(sqrt(total))) for i, binarized in enumerate(binarizes): plt.subplot(rows * 100 + cols * 10 + i + 1) plt.title(binarized[0]) plt.imshow(binarized[1], "gray") plt.show() def showMorph(self, regionlist): ''' 显示形态学处理结果 ''' if CliShowOptions.SHOW_MORPH not in self.show_opts: return self.show_opts.append(CliShowOptions.SHOW_EXTRACT) binarizes = [] for regiondata in regionlist: binarized = self.morpher.getMaskImage(regiondata[1], regiondata[2]) binarizes.append((regiondata[0], binarized)) self.showExtract(binarizes) def showMerging(self, tlregionlist): ''' 显示文本行合并结果 ''' if CliShowOptions.SHOW_MERGE not in self.show_opts and self.save_option is not CliSaveOptions.MERGE: return for item in tlregionlist: rgb_image = self.preprocessing.rgb_image.copy() rgb_image = TdMergingTextLine.drawRegions(rgb_image, (255, 255, 255), cv2.LINE_4, item[-1]) if CliShowOptions.SHOW_MERGE in self.show_opts: plt.imshow(rgb_image) plt.show() if self.save_option is CliSaveOptions.MERGE: bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) cv2.imwrite( "data/save/" + self.image_name + "-" + item[0] + ".jpg", bgr_image) def showSVC(self, final_tl): ''' 显示最终文本行 ''' if CliShowOptions.SHOW_SVC not in self.show_opts and self.save_option is not CliSaveOptions.SVC: return for item in final_tl: rgb_image = self.preprocessing.rgb_image.copy() rgb_image = TdMergingTextLine.drawRegions(rgb_image, (255, 255, 255), cv2.LINE_4, item[-2]) rgb_image = TdMergingTextLine.drawRegions(rgb_image, (0, 255, 0), cv2.LINE_4, item[-1]) if CliShowOptions.SHOW_SVC in self.show_opts: plt.imshow(rgb_image) plt.show() if self.save_option is CliSaveOptions.SVC: bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) cv2.imwrite( "data/save/" + self.image_name + "-" + item[0] + ".jpg", bgr_image) def showFinal(self, tlsin1): ''' 显示最终结果 ''' if CliShowOptions.SHOW_RESULT not in self.show_opts: return zoom_level = self.preprocessing.zoom_level tls = [np.int64(i * zoom_level) for i in tlsin1] origin_image = self.preprocessing.origin_image.copy() origin_image = TdMergingTextLine.drawRegions(origin_image, (255, 255, 255), cv2.LINE_4, tls) plt.imshow(origin_image) plt.show() def prepFunc(self, rgb_image): ''' 图像预处理 ''' self.preprocessing.setConfig(self.config.getPrepConfig()) self.preprocessing.rgb_image = rgb_image self.showPrep() def extractFunc(self): ''' 连通域提取 ''' binarizes = [] config = self.config.getExtractConfig() positive_conf = config.copy() positive_conf[TdExtractConfigKey.DIRECTION] = ExtractDirection.Positive negative_conf = config.copy() negative_conf[TdExtractConfigKey.DIRECTION] = ExtractDirection.Negitive ext_flt = self.filter.setConfig(self.config.getFilterConfig("extract")) for src in config[TdExtractConfigKey.SRCS]: grayname, chartype = src.split('.') if grayname == "Gray": gray = self.preprocessing.ret_gray elif grayname == "Red": gray = self.preprocessing.ret_red elif grayname == "Green": gray = self.preprocessing.ret_green elif grayname == "Blue": gray = self.preprocessing.ret_blue else: sys.exit() if chartype in ["Black", "Both"]: image = [{ 'name': grayname, 'image': gray[1], 'conf': positive_conf }] binarized = self.extracter.extract_with_labels_for_images( image, ext_flt) binarizes.append((src, binarized, gray[1])) if chartype in ["Bright", "Both"]: image = [{ 'name': grayname, 'image': gray[2], 'conf': negative_conf }] binarized = self.extracter.extract_with_labels_for_images( image, ext_flt) binarizes.append((src, binarized, gray[2])) self.showExtract(binarizes) return binarizes def morphFunc(self, binarizes): ''' 形态学处理 ''' regionlist = [] mph_flt = self.filter.setConfig(self.config.getFilterConfig("morph")) for binarized in binarizes: regions = self.morpher.morph_operation(binarized[1], binarized[2], mph_flt) regionlist.append(((binarized[0], binarized[1], regions.copy()))) self.showMorph(regionlist) return regionlist def mergingFunc(self, regionlist): ''' 文本行合并 ''' tlregionlist = [] config = self.config.getMergeTLConfig() self.merger.setConfig(config) for name, binarized, regions in regionlist: self.merger.printParams(name) tl_regions = self.merger.mergeTextLine(regions) tlregionlist.append((name, binarized, tl_regions)) self.showMerging(tlregionlist) return tlregionlist def svcFunc(self, tlregionlist, mconf_path): ''' 基于纹理特征过滤 ''' self.svc.init(mconf_path) final_tl = [] for item in tlregionlist: bg_image = self.preprocessing.getRet(item[0])[-1] tl = [] for region in item[-1]: sample, _ = crop_rect(bg_image, region) ret = self.svc.predict(sample) if ret: tl.append(region) final_tl.append((item[0], item[-1], tl)) self.showSVC(final_tl) return final_tl def finalRet(self, name, final_tl): ''' 合并多个通道的文本行 ''' tls = [] for item in final_tl: tls.extend(item[-1]) config = self.config.getMergeTLConfig() merger = TdMergingOverlap() merger.setConfig(config) tlsin1 = merger.mergeTextLine(tls) self.showFinal(tlsin1) return (name, tlsin1) def ocrFunc(self, tls): ''' OCR ''' zoom_level = self.preprocessing.zoom_level tls = [np.int64(i * zoom_level) for i in tls[1]] origin_image = self.preprocessing.origin_image.copy() texts = self.ocr.ocr(tls, origin_image, self.image_name) print("%s %s" % (self.image_name, texts)) return texts def makeSVCSample(self, tlregionlist): ''' 生成特征分类样本 ''' if self.makesample is False: return plt.ion() i = 0 for item in tlregionlist: bg_image = self.preprocessing.getRet(item[0])[-1] for region in item[-1]: sample, _ = crop_rect(bg_image, region) plt.imshow(sample, "gray") plt.pause(0.2) judge = input("is text region? : ") judge = 'Y' if judge == 'Y' else 'N' cv2.imwrite( "data/Model/" + self.image_name + str(i) + "-" + judge + ".jpg", sample) i += 1 plt.ioff() sys.exit(0)