def aaa(request): if request.method == 'GET': return render(request, 'aaa.html') file = request.FILES.get('file') b = b'' for c in file.chunks(): b += c try: ocr = CnOcr() except: pass sep = ',' img_data = plt.imread(BytesIO(b), "png") # print(type(img_data)) # print(img_data.shape) image = img_data[:, :, :3] # print(image.shape) image = (image * 255).astype(np.int) try: res = ocr.ocr_for_single_line(image) msg = sep.join(res).replace(',', '') data = {'code': 200, 'msg': msg} return JsonResponse(data) except: msg = '图片名含有非转义字符,建议使用数字字母的形式' data = {'code': 400, 'msg': msg} return JsonResponse(data)
def get_list(self): std=CnStd() ocr=CnOcr() ocr._model_name='conv-lite-fc' print(ocr._model_name) ocr_res2=ocr.ocr(img_fp=self.filepath) box_info_list=std.detect(self.filepath,pse_threshold=0.7,pse_min_area=150,context='gpu',height_border=0.10) image=Image.open(self.filepath) fontstyle=ImageFont.truetype('./simhei.ttf',13,encoding='utf-8') draw=ImageDraw.Draw(image) for box_info in box_info_list: print('a') print('a') print(box_info) info_box=box_info['box'] crp_img=box_info['cropped_img'] ocr_res1=ocr.ocr_for_single_line(crp_img) print('result: %s' % ''.join(str(ocr_res1))) x1,y1=info_box[0,0],info_box[0,1] x2,y2=info_box[1,0],info_box[1,1] x3,y3=info_box[2,0],info_box[2,1] x4,y4=info_box[3,0],info_box[3,1] draw.polygon([(x1,y1),(x4,y4),(x3,y3),(x2,y2)],outline=(255,0,0)) draw.text((x4,y4),str(ocr_res1),(200,0,0),font=fontstyle) image.show() print(ocr_res2) return box_info_list
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model_name", help="model name", type=str, default='conv-lite-fc') parser.add_argument("--model_epoch", type=int, default=None, help="model epoch") parser.add_argument("-f", "--file", help="Path to the image file") parser.add_argument( "-s", "--single-line", default=False, help="Whether the image only includes one-line characters", ) args = parser.parse_args() ocr = CnOcr(model_name=args.model_name, model_epoch=args.model_epoch) if args.single_line: res = ocr.ocr_for_single_line(args.file) else: res = ocr.ocr(args.file) logger.info("Predicted Chars: %s", res)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-f", "--file", help="Path to the CAPTCHA image file") args = parser.parse_args() ocr = CnOcr() res = ocr.ocr_for_single_line(args.file) print("Predicted Chars:", res)
async def Img2Text(img: Image, ocr_model: CnOcr, std: CnStd) -> str: url = img.url async with aiohttp.ClientSession() as session: async with session.get(url=url) as resp: img_content = await resp.read() img = IMG.open(BytesIO(img_content)).convert("RGB") img = numpy.array(img) box_info_list = std.detect(img) res = [] for box_info in box_info_list: cropped_img = box_info['cropped_img'] # 检测出的文本框 ocr_res = ocr_model.ocr_for_single_line(cropped_img) res.append([ocr_res]) print(res) return "".join(await flat(res))
def main(): parser = argparse.ArgumentParser() parser.add_argument("-f", "--file", help="Path to the image file") parser.add_argument( "-s", "--single-line", default=False, help="Whether the image only includes one-line characters") args = parser.parse_args() ocr = CnOcr() if args.single_line: res = ocr.ocr_for_single_line(args.file) else: res = ocr.ocr(args.file) print("Predicted Chars:", res)
def orc_text(path_image): ocr = CnOcr() image = cv2.imread(path_image, cv2.IMREAD_GRAYSCALE) height_image, width_image = image.shape _, binary_image = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY_INV) height_projection = get_horizontal_projection(binary_image) text_lines = get_text_lines(height_projection) text_list = [] for line_index, text_line in enumerate(text_lines): text_line_image = binary_image[text_line[0]:text_line[1], 0:width_image] vertical_projection = get_vertical_projection(text_line_image) text_words = get_text_word(vertical_projection) text_line_word_image = image[text_line[0]:text_line[1], text_words[0][0]:text_words[-1][1]] res = ocr.ocr_for_single_line(text_line_word_image) text_list.append(''.join(res)) return text_list
def extract_content(save_dir_path): std = CnStd() cn_ocr = CnOcr() base_path = os.path.abspath(os.path.dirname(__file__)) pic_base_dir = os.path.join(base_path, "received") pic_path_list = (glob.glob(os.path.join(pic_base_dir, "*.png")) + glob.glob(os.path.join(pic_base_dir, "*.jpg")) + glob.glob(os.path.join(pic_base_dir, "*.jpeg"))) workbook = xlwt.Workbook() for index, pic_path in enumerate(pic_path_list): sheet = workbook.add_sheet('sheet{}'.format(index), cell_overwrite_ok=True) box_info_list = std.detect(pic_path) for box_info in box_info_list: x_list, y_list = [], [] for (x, y) in box_info['box']: x_list.append(x) y_list.append(y) top, bottom, left, right = min(y_list), max(y_list), min( x_list), max(x_list) top_row, bottom_row, left_column, right_column = int( top // 80), int(bottom // 80), int(left // 60), int(right // 60) cropped_img = box_info['cropped_img'] # 检测出的文本框 ocr_res = ''.join(cn_ocr.ocr_for_single_line(cropped_img)) try: logger.info( "top_row:{}, bottom_row:{}, left_column:{}, right_column:{}, ocr_res:{}", top_row, bottom_row, left_column, right_column, ocr_res, feature="f-strings") sheet.write_merge(top_row, bottom_row, left_column, right_column, ocr_res) except Exception as e: print(e) xls_base_dir = os.path.join(base_path, save_dir_path) xls_path = os.path.join(xls_base_dir, "res.xls") workbook.save(xls_path)
def ocr(): if 'file' not in request.files: return jsonify(code=-1, message='no file error'), 400 file = request.files['file'] _uuid = str(uuid.uuid1()) file_name = '/tmp/ocr/' + _uuid file.save(file_name) ocr = CnOcr(name=_uuid) std = CnStd(name=_uuid) box_info_list = std.detect(file_name) lines = [] for box_info in box_info_list: cropped_img = box_info['cropped_img'] # 检测出的文本框 ocr_res = ocr.ocr_for_single_line(cropped_img) lines.append(''.join(ocr_res)) return jsonify(code=0, message='ok', data=lines)
def to_raw_srt(path, srt_dir): ocr = CnOcr() if not os.path.exists(srt_dir): os.mkdir(srt_dir) dir_list = [int(r) for r in os.listdir(path) if r.isdigit()] dir_list.sort() file = srt_dir + '/to_srt' + '.txt' if os.path.exists(file): os.remove(file) for i in dir_list: child_dir = path + '/' + str(i) file_list = os.listdir(child_dir) for j in file_list: frame_no = str(child_dir.split('/')[-1]) text_position = j.split('.')[0] # OCR识别调用 content = cleantxt("".join(ocr.ocr_for_single_line(child_dir + '/' + j))) with open(file, 'a+') as f: f.write(frame_no + '\t' + text_position + '\t' + content + '\r\n') f.close()
def target_function(idx, ls, prefix): # 使得每一张卡都能被充分用到 # 提交的时候由于只有一张卡,需要全部都改成0 os.environ['CUDA_VISIBLE_DEVICES'] = '0' std = CnStd(context='gpu') cn_ocr = CnOcr(context='gpu') result = dict() for file_name in tqdm(ls): file_path = os.path.join(prefix, file_name) box_info_list = std.detect(file_path) output = '' for box_info in box_info_list: cropped_img = box_info['cropped_img'] # 检测出的文本框 if type(cropped_img) != type(None): ocr_res = cn_ocr.ocr_for_single_line(cropped_img) output += ''.join(ocr_res) # print('ocr result: %s' % ''.join(ocr_res)) output = output.replace(' ', '') output = re.sub("[^\u4e00-\u9fa5]", "", output) result[file_name] = output with open('./output_%d.json' % (idx), 'w', encoding='utf-8') as w: w.write(json.dumps(result, ensure_ascii=False, indent=2))
from cnocr import CnOcr import cnocr from cnstd import CnStd std=CnStd() ocr=CnOcr() box_info_list=std.detect('E:\\Work Place\\pocr\\pic\\2.png') res=ocr.ocr('E:\\Work Place\\pocr\\pic\\1.png') for box_info in box_info_list: crp_img=box_info['cropped_img'] ocr_res=ocr.ocr_for_single_line(crp_img) print('result: %s' % ''.join(ocr_res))
class GUIMainWin(QMainWindow, Ui_ArknightsRecruimentHelperGUI): def __init__(self, parent=None): super(GUIMainWin, self).__init__(parent) # Basic Setups self.setupUi(self) self.appExceptionHandler = ExceptHookHandler( self, logFile=os.path.join(os.path.expanduser('~'), "ArknightsRecruitmentHelper", "log.txt")) self.setWindowTitle(globalVersion) # Key Initializations self.handle = 0 self.initializeHandleSelector() self.tagImageSceneList = [ QGraphicsScene(), QGraphicsScene(), QGraphicsScene(), QGraphicsScene(), QGraphicsScene() ] self.tagTypeList = ["", "", "", "", ""] # Handler Connections self.handleSelectorComboBox.activated[str].connect(self.onHandleSelect) self.updateHandleSelectorListButton.clicked.connect( self.initializeHandleSelector) self.manualRecognizeAndAnalyzeButton.clicked.connect( self.recognizeAndAnalyze) self.updateDataButton.clicked.connect(self.updateData) self.ocrInstance = CnOcr(model_name='conv-lite-fc') def initializeHandleSelector(self): emulator_lst = dict() emulator_hwnd = ["subWin", "canvasWin", "BlueStacksApp" ] # subWin: nox, ldplayer | canvasWin: mumu def check_emulator_window(hwnd, p): if win32gui.GetClassName( hwnd) in emulator_hwnd and hwnd not in emulator_lst: emulator_lst.update({hwnd: p}) else: win32gui.EnumChildWindows(hwnd, check_emulator_window, p) def gui_get_all_hwnd(hwnd, mouse): if win32gui.IsWindow(hwnd) and win32gui.IsWindowEnabled( hwnd) and win32gui.IsWindowVisible(hwnd): if win32gui.GetClassName( hwnd) == "UnityWndClass" and win32gui.GetWindowText( hwnd ) == "PrincessConnectReDive": # DMM Game Player # emulator_lst.update({hwnd: "DMM_PrincessConnectReDive"}) print("yo!") else: win32gui.EnumChildWindows(hwnd, check_emulator_window, win32gui.GetWindowText(hwnd)) win32gui.EnumWindows(gui_get_all_hwnd, 0) self.handleList = [] for h, t in emulator_lst.items(): if t is not "": self.handleList.append([h, t]) self.titleList = [handle[1] for handle in self.handleList] self.handleSelectorComboBox.clear() self.handleSelectorComboBox.addItems(self.titleList) if len(self.titleList) == 1: self.handle = list(emulator_lst.keys())[0] self.queryStatusTag.setText('等待查询') self.queryStatusTag.setStyleSheet("color:green") else: self.queryStatusTag.setText("请选择句柄") self.queryStatusTag.setStyleSheet("color:red") def onHandleSelect(self, handleTitle): def getHandle(handleTitle): for handle in self.handleList: if handle[1] == handleTitle: return handle targetHandle = getHandle(handleTitle) self.handle = targetHandle[0] self.queryStatusTag.setText("等待查询") self.queryStatusTag.setStyleSheet("color:green") def recognizeAndAnalyze(self, slotNum: [0, 1, 2, 3]): strategyListDict = util.config_loadStrategyListDict() self.tagOneLabel.setText("识别中...") self.tagTwoLabel.setText("识别中...") self.tagThreeLabel.setText("识别中...") self.tagFourLabel.setText("识别中...") self.tagFiveLabel.setText("识别中...") if self.handle == 0: QMessageBox.information(self, "No Handle", "No Handle") self.queryStatusTag.setText("请选择句柄") self.queryStatusTag.setStyleSheet("color:red") return gameImage = screen.grabWindow( self.handle).toImage() # 直接截取vbox子窗口和DMM的UnityWnd translatedCharH = gameImage.height( ) * config_dict['tagImageParams']['heightRatio'] translatedCharW = gameImage.width( ) * config_dict['tagImageParams']['widthRatio'] self.tagImageList = [ gameImage.copy(gameImage.width() * ratioCordinates['x'], gameImage.height() * ratioCordinates['y'], translatedCharW, translatedCharH).scaledToWidth(100) for ratioCordinates in config_dict['tagImageParams'] ['tagLocationRatio'] ] self.tagImagePixList = [ QtGui.QPixmap.fromImage(tagImage) for tagImage in self.tagImageList ] self.tagImageItemList = [ QGraphicsPixmapItem(tagImagePix) for tagImagePix in self.tagImagePixList ] for i in range(len(self.tagImageSceneList)): self.tagImageSceneList[i].addItem(self.tagImageItemList[i]) self.tagImageOneGraphicsView.setScene(self.tagImageSceneList[0]) self.tagImageTwoGraphicsView.setScene(self.tagImageSceneList[1]) self.tagImageThreeGraphicsView.setScene(self.tagImageSceneList[2]) self.tagImageFourGraphicsView.setScene(self.tagImageSceneList[3]) self.tagImageFiveGraphicsView.setScene(self.tagImageSceneList[4]) # tagImageToTagTypeRunnableList = [] # for i in range(5): # tagImageToTagTypeRunnableList.append(tagImageToTagTypeRunnable(self.tagImageList[i], i, self)) try: for i in range(5): rawImage = self.tagImageList[i].convertToFormat(4) w = rawImage.width() h = rawImage.height() ptr = rawImage.bits() ptr.setsize(rawImage.byteCount()) preparedImage = np.array(ptr).reshape(h, w, 4) #此处完成转换 preparedImage = cv2.cvtColor(preparedImage, cv2.COLOR_BGRA2BGR) tagType = "".join( self.ocrInstance.ocr_for_single_line(preparedImage)) tagType = re.sub('[\x00-\xff,-]', '', tagType) self.tagTypeList[i] = tagType if i == 0: self.tagOneLabel.setText(tagType) if i == 1: self.tagTwoLabel.setText(tagType) if i == 2: self.tagThreeLabel.setText(tagType) if i == 3: self.tagFourLabel.setText(tagType) if i == 4: self.tagFiveLabel.setText(tagType) print('tagType %s' % i, tagType) except Exception as e: print(e) pass candidateStrategyListDict = { 'singleTagStrategyList': [], 'doubleTagStrategyList': [] } for singleTagStrategy in strategyListDict['singleTagStrategyList']: if singleTagStrategy['tag'] in self.tagTypeList: candidateStrategyListDict['singleTagStrategyList'].append( singleTagStrategy) for doubleTagStrategy in strategyListDict['doubleTagStrategyList']: if set(doubleTagStrategy['tagCombination']).issubset( self.tagTypeList): candidateStrategyListDict['doubleTagStrategyList'].append( doubleTagStrategy) print(candidateStrategyListDict) self.singleTagStrategyListTempPlainTextEdit.clear() self.doubleTagStrategyListTempPlainTextEdit.clear() if len(candidateStrategyListDict['singleTagStrategyList']) == 0: self.singleTagStrategyListTempPlainTextEdit.appendPlainText('无结果') else: for singleTagStrategy in candidateStrategyListDict[ 'singleTagStrategyList']: self.singleTagStrategyListTempPlainTextEdit.appendPlainText( "选择%s可以锁定:\n" % singleTagStrategy['tag']) for plausibleOperator in singleTagStrategy['plausibleList']: self.singleTagStrategyListTempPlainTextEdit.appendPlainText( "⭐%s %s" % (str((plausibleOperator['rarity'] + 1)), plausibleOperator['name'])) if len(candidateStrategyListDict['doubleTagStrategyList']) == 0: self.doubleTagStrategyListTempPlainTextEdit.appendPlainText('无结果') else: for doubleTagStrategy in candidateStrategyListDict[ 'doubleTagStrategyList']: self.doubleTagStrategyListTempPlainTextEdit.appendPlainText( "选择%s + %s 可以锁定:\n" % (doubleTagStrategy['tagCombination'][0], doubleTagStrategy['tagCombination'][1])) for plausibleOperator in doubleTagStrategy['plausibleList']: self.doubleTagStrategyListTempPlainTextEdit.appendPlainText( "⭐%s %s" % (str((plausibleOperator['rarity'] + 1)), plausibleOperator['name'])) # for key in list(dataDict.keys()): # try: # processedTagTypeList = copy.deepcopy(self.tagTypeList[i]) # processedTagTypeList = list(filter(lambda item: item != "新手", processedTagTypeList)) # processedTagList = copy.deepcopy(dataDict[key]["tagList"]) # if dataDict[key]['position'] == "RANGED": # processedTagList.append("远程位") # else: # processedTagList.append("近战位位") # intersection = list(set(processedTagList) & set(self.tagTypeList)) # if len(intersection) != 0: # candidateList.append(dataDict[key]) # except: # # pass # readableCandidateList = [candidate["name"] for candidate in candidateList] # print(readableCandidateList) def updateData(self): url = "https://github.91chifun.workers.dev//https://raw.githubusercontent.com/Kengxxiao/ArknightsGameData/master/zh_CN/gamedata/excel/character_table.json" newOTARunnable = OTARunnable(url) QThreadPool.globalInstance().start(newOTARunnable) # 这里可能会产生异步问题 dataDict = util.loadDataDict() strategyListDict = util.config_loadStrategyListDict()
for w in place: words.add(w) place_con[place] = k ocr = CnOcr(model_name='densenet-lite-gru') d = d3dshot.create(capture_output="numpy") time.sleep(1) region = cfg['region'] region = region[0], region[1], region[0] + region[2], region[1] + region[3] d.capture(region=region) st = time.time() while time.time() - st < 80: frame = d.get_latest_frame() if frame is None: time.sleep(0.001) continue ocr_res = ocr.ocr_for_single_line(frame) ocr_res = "".join(ocr_res) if "," in ocr_res: ocr_res = ocr_res.split(",")[0] fix_res = list(filter(lambda x: x in words, ocr_res)) fix_res = "".join(fix_res) print(f'result: {fix_res} ocr result: {ocr_res}') check = [(k, Levenshtein.distance(k, fix_res)) for k in place_con] place, dis = sorted(check, key=lambda x: x[1])[0] if dis <= 1: con = place_con[place] click_pos = cfg['postbox'][con] print(f'click on {con}') pyautogui.leftClick(click_pos[0], click_pos[1], interval=0.1, duration=0.1) d.stop()
# -*- coding: utf-8 -*- from cnocr import CnOcr # pip install cnocr # 中文OCR import cv2 cn_ocr = CnOcr(root='cnocr_model') # img = cv2.imread('data/tt15.png') img = cv2.imread('data/tt16.png') ocr_res = cn_ocr.ocr_for_single_line(img) print('ocr result: %s' % ''.join(ocr_res))
def predict(file): img_gray = io.imread(file, True) img_gray = add_black_line(img_gray) img_binary = get_binary_img(img_gray) #二值化图像 rows, cols = img_binary.shape scale = 25 col_selem = morphology.rectangle(cols // scale, 1) # 用长cols//scale,宽1的滤波器进行腐蚀膨胀 row_selem = morphology.rectangle(1, rows // scale) # 用长1,快rows//scale的滤波器进行腐蚀膨胀 img_cols = dil_ero(img_binary, col_selem) # 竖线图 img_rows = dil_ero(img_binary, row_selem) # 横线图 img_line = img_cols * img_rows # 有0的都取0 得到线图 img_dot = img_cols + img_rows # 有白的都取白 得到点图 # 画线图 plt.imshow(img_line, plt.cm.gray) plt.show() # 画点图 plt.imshow(img_dot, plt.cm.gray) plt.show() for i in range(rows): for j in range(cols): if img_dot[i, j] == 2: img_dot[i, j] = 1 ''' 点团浓缩为单个像素 ''' width = 7 idx = np.argwhere(img_dot == 0) idx_unirow = list(np.unique(idx[:, 0])) idx_unicol = list(np.unique(idx[:, 1])) dot_rows = len(idx_unirow) dot_cols = len(idx_unicol) for i in range(dot_rows): cur_row = idx_unirow[i] for j in range(dot_cols): cur_col = idx_unicol[j] value = img_dot[cur_row, cur_col] if value == 0: for x in range(0, width + 1): for y in range(0, width + 1): if (not (x == 0 and y == 0) ) and cur_row + x < rows and cur_col + y < cols: # print(i + x, ' ', j + y) img_dot[cur_row + x, cur_col + y] = 1 ''' 统一横纵坐标标准 ''' idx = np.argwhere(img_dot == 0) # 点的坐标 idx_unirow = list(np.unique(idx[:, 0])) idx_unicol = list(np.unique(idx[:, 1])) row_standard = [] for i in range(len(idx_unirow)): each_unique_row = idx_unirow[i] if i == 0: row_standard.append(each_unique_row) continue if each_unique_row not in row_standard: flag = True for each_standard in row_standard: difference = abs(each_unique_row - each_standard) if difference <= 3: flag = False break if flag: row_standard.append(each_unique_row) col_standard = [] for i in range(len(idx_unicol)): each_unique_col = idx_unicol[i] if i == 0: col_standard.append(each_unique_col) continue if each_unique_col not in col_standard: flag = True for each_standard in col_standard: difference = abs(each_unique_col - each_standard) if difference <= 3: flag = False break if flag: col_standard.append(each_unique_col) ''' 对点的横纵坐标进行统一 ''' for i in range(len(idx)): each_dot = idx[i] for each_row_standard in row_standard: row = each_dot[0] row_difference = abs(each_row_standard - row) if row_difference <= 3: each_dot[0] = each_row_standard break for each_col_standard in col_standard: col = each_dot[1] col_difference = abs(each_col_standard - col) if col_difference <= 3: each_dot[1] = each_col_standard break idx = np.unique(idx, axis=0) img_dot = np.ones((rows, cols)) for each_dot in idx: row = each_dot[0] col = each_dot[1] img_dot[row, col] = 0 # 点图 plt.imshow(img_dot, plt.cm.gray) plt.show() '''对线的横纵坐标进行补线''' # 统一横线 row_idx = np.argwhere(img_rows == 0) for i in range(len(row_idx)): each_row_dot = row_idx[i] row = each_row_dot[0] for each_row_standard in row_standard: difference = abs(row - each_row_standard) if difference <= 3: col = each_row_dot[1] img_rows[each_row_standard, col] = 0 break # 统一竖线 col_idx = np.argwhere(img_cols == 0) for i in range(len(col_idx)): each_col_dot = col_idx[i] col = each_col_dot[1] for each_col_standard in col_standard: difference = abs(col - each_col_standard) if difference <= 3: row = each_col_dot[0] img_cols[row, each_col_standard] = 0 break # 画线图 img_line = img_cols * img_line plt.imshow(img_line, plt.cm.gray) plt.show() # 画点图 plt.imshow(img_dot, plt.cm.gray) plt.show() ''' 按黑线切分单元格 ''' idx = np.argwhere(img_dot == 0) # 点的坐标 idx_unirow = list(np.unique(idx[:, 0])) cell_items = [] for i in range(len(idx_unirow) - 1): row_cur = idx_unirow[i] # 第一行横坐标 idx_row_cur = idx[idx[:, 0] == row_cur] # 当前行的所有点坐标 for j in range(len(idx_row_cur) - 1): # 遍历当前行的所有点坐标作为起点 # 检测左上角点是否可以作为起点 # 检测左边是否连续 col_cur = idx_row_cur[j][1] row_next_test_1 = idx_unirow[i + 1] black_line_test_1 = img_cols[row_cur:row_next_test_1, col_cur] if np.sum(black_line_test_1) != 0: # print('左上角点与下一行的像素和等于', np.sum(black_line_test_1),'失败') continue # 检测上边是否连续 col_next_text_2 = idx_row_cur[j + 1][1] black_line_test_2 = img_rows[row_cur, col_cur:col_next_text_2] if np.sum(black_line_test_2) != 0: continue for a in range(j + 1, len(idx_row_cur)): # 遍历当前行的当前点右边的所有点的坐标 col_next = idx_row_cur[a][1] # 当前行下一个点(右上角点)的纵坐标 idx_col_next = idx[idx[:, 1] == col_next] # 下一列的所有点的坐标 index = None length = len(idx_col_next) # 找到右上角的点在当前列的索引 for b in range(length): if idx_col_next[b][0] == row_cur and idx_col_next[b][ 1] == col_next: index = b break # 检测右上角点是否可以作为起点 if index + 1 < length: # 如果这个起点不是这列的最下面的点 row_next_test_3 = idx_col_next[index + 1][0] # 右上角点下一行测试点的横坐标 black_line_test_3 = img_cols[row_cur:row_next_test_3, col_next] else: continue if np.sum(black_line_test_3) != 0: continue for c in range(index + 1, length): row_next = idx_col_next[c][0] # 第三个点的横坐标 left_line = img_cols[row_cur:row_next, col_cur] right_line = img_cols[row_cur:row_next, col_next] if np.sum(left_line) != 0 or np.sum(right_line) != 0: break down_line = img_rows[row_next, col_cur:col_next] if np.sum(down_line) == 0: sub_img = img_binary[row_cur:row_next, col_cur:col_next] sub_item = Cell(sub_img, (row_cur, col_cur)) cell_items.append(sub_item) break break '''更新切分''' cell_items_new = [] for each_cell in cell_items: result = cut(each_cell, idx_unirow) if result: for each_sub_item in result: cell_items_new.append(each_sub_item) else: cell_items_new.append(each_cell) x = [] y = [] for each in cell_items_new: x.append(each.location[0]) y.append(each.location[1]) unique_x = np.unique(x) unique_y = np.unique(y) unique_x.sort() unique_y.sort() df = DataFrame(np.full((len(unique_x), len(unique_y)), np.nan)) ocr = CnOcr(model_name='densenet-lite-fc', model_epoch=72, root='local_models') root = 'sub_cut_df' t = 0 for each_cell in cell_items_new: a, b = each_cell.location c = a + each_cell.img.shape[0] d = b + each_cell.img.shape[1] t += 1 img = img_gray[a:c, b:d] img = clean(img) # 去黑线 img = clean_white_vertical(img) # 去上下白边 img = clean_white_horizontal(img) # 去左右白边 img *= 255 row_index = np.argwhere(unique_x == each_cell.location[0])[0][0] col_index = np.argwhere(unique_y == each_cell.location[1])[0][0] dir = os.path.join( root, str(t) + '_(' + str(row_index) + ', ' + str(col_index) + ')' + '[' + str(a) + ', ' + str(b) + ']' + '.jpg') io.imsave(dir, img) img = cv2.imdecode(np.fromfile(dir, dtype=np.uint8), 0) height, width = img.shape img = np.array(cv2.resize(img, (3 * width, 3 * height))) cv2.imencode('.jpg', img)[1].tofile(dir) try: value = ocr.ocr_for_single_line(img) value = ''.join(value) except: value = np.NAN print(value) df.iloc[row_index, col_index] = value df.to_excel('result_df.xlsx', index=False)
class picture(QWidget): def __init__(self): super(picture, self).__init__() self.resize(350, 350) self.setWindowTitle("图片转文字") self.label = QLabel(self) # self.label.setText("显示图片") self.label.setScaledContents(True) self.label.setFixedSize(300, 200) self.label.move(25, 60) self.label.setStyleSheet( "QLabel{background:white;}" "QLabel{color:rgb(300,300,300,120);font-size:10px;font-weight:bold;font-family:宋体;}" ) btn = QPushButton(self) btn.setText("打开图片") btn.move(135, 20) btn.clicked.connect(self.openimage) self.label_text = QLabel(self) self.label_text.setFixedSize(300, 30) self.label_text.move(25, 270) self.label_text.setTextInteractionFlags( Qt.TextSelectableByMouse) ###可复制 self.label_wait = QLabel(self) self.label_wait.setFixedSize(300, 30) self.label_wait.move(25, 300) # 标签1的背景填充更改为True,否则无法显示背景 self.label_wait.setAutoFillBackground(True) # # 实例化背景对象,进行相关背景颜色属性设置 # palette = QPalette() # palette.setColor(QPalette.Window, Qt.green) # # 标签1加载背景 # self.label_wait.setPalette(palette) # 设置文本居中显示 self.label_wait.setAlignment(Qt.AlignCenter) self.label_wait.setText('tips:识别过程可能会卡住,需几秒到几十秒不等') self.std = CnStd() self.cn_ocr = CnOcr() def openimage(self): imgName, imgType = QFileDialog.getOpenFileName( self, "打开图片", "", "*.jpg;;*.png;;All Files(*)") if imgName and imgType: # 实例化背景对象,进行相关背景颜色属性设置 palette = QPalette() palette.setColor(QPalette.Window, Qt.green) # 标签1加载背景 self.label_wait.setPalette(palette) box_info_list = self.std.detect(imgName) result = '' for box_info in box_info_list: cropped_img = box_info['cropped_img'] # 检测出的文本框 # cv2.imshow('1', cropped_img) # cv2.waitKey(0) ocr_res = self.cn_ocr.ocr_for_single_line(cropped_img) result += ''.join(ocr_res) # print('ocr result: %s' % ''.join(ocr_res)) # print(result) self.label_text.setText(result) self.label_wait.setText('↑点击文字,ctrl+a全选、ctrl+c复制、ctrl+v粘贴') jpg = QtGui.QPixmap(imgName).scaled(self.label.width(), self.label.height()) self.label.setPixmap(jpg) with open('history.txt', 'a', encoding='utf8') as f: f.write(result + '\n')
print('初始化失败,未找到预加载模型') return None def has_chinese(word): for ch in word: if '\u4e00' <= ch <= '\u9fff': return True return False if __name__ == '__main__': ocr = CnOcr() std = CnStd() path = '/images/life.jpg' img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.array(img) # res = ocr.ocr(img) box_info_list = std.detect(img) res = '' for box_info in box_info_list: cropped_img = box_info['cropped_img'] ocr_res = ocr.ocr_for_single_line(cropped_img) res += ''.join(ocr_res) + '\n' # print('ocr result: %s' % ''.join(ocr_res)) print("Predicted Chars:", res)
import mxnet as mx from cnocr import CnOcr ocr = CnOcr() img_fp = 'examples/1.png' img = mx.image.imread(img_fp, 1) res = ocr.ocr_for_single_line(img) print("Predicted Chars:", res)
text_list.append(''.join(res)) return text_list if __name__ == '__main__': ocr = CnOcr() image = cv2.imread(r'try.png', cv2.IMREAD_GRAYSCALE) print(image) print(image.shape) cv2.imshow('gray_image', image) cv2.waitKey(0) cv2.destroyAllWindows() height_image, width_image = image.shape _, binary_image = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY_INV) cv2.imshow('binary_image', binary_image) cv2.waitKey(0) cv2.destroyAllWindows() height_projection = get_horizontal_projection(binary_image) text_lines = get_text_lines(height_projection) for line_index, text_line in enumerate(text_lines): text_line_image = binary_image[text_line[0]:text_line[1], 0:width_image] vertical_projection = get_vertical_projection(text_line_image) text_words = get_text_word(vertical_projection) text_line_word_image = image[text_line[0]:text_line[1], text_words[0][0]:text_words[-1][1]] cv2.imshow('text_line_word_image', text_line_word_image) cv2.waitKey(0) cv2.destroyAllWindows() res = ocr.ocr_for_single_line(text_line_word_image) print(''.join(res))