def general_ocr(): upload_image = request.form['image'] # print(upload_image) # 是否进行方向检测 detect_direction = request.form['detect_direction'] # print(detect_direction) image = base64.b64decode(upload_image) image = np.fromstring(image, np.uint8) # image = cv2.imdecode(image, cv2.IMREAD_COLOR) # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = io.BytesIO(image) image = Image.open(image).convert('RGB') image = np.array(image) # print(image) ''' jobid = uuid.uuid1().__str__() path = 'test/{}.jpg'.format(jobid) with open(path, 'wb') as f: f.write(image) img = cv2.imread(path) ##GBR ''' timeTake = time.time() _, result, angle = model.model( image, detectAngle=detect_direction, ##是否进行文字方向检测,通过web传参控制 config=dict( MAX_HORIZONTAL_GAP=200, ##字符之间的最大间隔,用于文本行的合并 MIN_V_OVERLAPS=0.6, MIN_SIZE_SIM=0.6, TEXT_PROPOSALS_MIN_SCORE=0.2, # 是文字的概率 TEXT_PROPOSALS_NMS_THRESH=0.3, TEXT_LINE_NMS_THRESH=0.99, ##文本行之间测iou值 ), leftAdjust=True, ##对检测的文本行进行向左延伸 rightAdjust=True, ##对检测的文本行进行向右延伸 alph=0.1, ##对检测的文本行进行向右、左延伸的倍数 ) result = union_rbox(result, 0.2) res = [{ 'text': x['text'], 'name': str(i), 'box': { 'cx': x['cx'], 'cy': x['cy'], 'w': x['w'], 'h': x['h'], 'angle': x['degree'] } } for i, x in enumerate(result)] res = adjust_box_to_origin(image, angle, res) ##修正box timeTake = time.time() - timeTake print({'res': res, 'timeTake': round(timeTake, 4)}) return json.dumps({ 'res': res, 'timeTake': round(timeTake, 4) }, ensure_ascii=False)
def format_text(self, textbox, img, angle, billModel='general_OCR', CommandID= ''): """ 格式化各种图片提取的文本 :param textbox: 提取的文本框(包括坐标和文本内容) :param img: 原图 :param angle: 原图需要旋转的角度 :param billModel: 图片类型,方便格式化 :param CommandID: 判断来自网页的请求(文本展示),还是返回给服务器的请求 :return: res: json格式的格式化结果 """ if billModel == '' or billModel == 'general_OCR': result = union_rbox(textbox, 0.2) res = [{'text': x['text'], 'name': str(i), 'box': {'cx': x['cx'], 'cy': x['cy'], 'w': x['w'], 'h': x['h'], 'angle': x['degree'] } } for i, x in enumerate(result)] res = adjust_box_to_origin(img, angle, res) ##修正box elif billModel == 'trainticket': res = trainTicket.trainTicket(textbox) res = res.res if CommandID != '': res = {key: res[key] for key in res} else: res = [{'text': res[key], 'name': key, 'box': {}} for key in res] elif billModel == 'idcard': res = idcard.idcard(textbox) res = res.res if CommandID != '': res = {key: res[key] for key in res} else: res = [{'text': res[key], 'name': key, 'box': {}} for key in res] elif billModel == 'invoice': res = invoice.invoice(textbox) res = res.res if CommandID != '': res = {key: res[key] for key in res} else: res = [{'text': res[key], 'name': key, 'box': {}} for key in res] elif billModel == 'bankcard': res = bankcard.bankcard(textbox) res = res.res if CommandID != '': res = {key: res[key] for key in res} else: res = [{'text': res[key], 'name': key, 'box': {}} for key in res] elif billModel == 'licenseplate': if CommandID != '': res = {'carNo': list(textbox), 'picUrl': '', 'picName': ''} else: res = [{'text': text, 'name': 'carNo', 'box': {}} for text in list(textbox)] return res
def getTextList(self, img, angle, result): result = union_rbox(result,0.2) res = [{'text':x['text'], 'name':str(i), 'box':{'cx':x['cx'], 'cy':x['cy'], 'w':x['w'], 'h':x['h'], 'angle':x['degree'] } } for i,x in enumerate(result)] res = adjust_box_to_origin(np.copy(img), angle, res)##修正box textStrings = '' for each in res: textStrings += each["text"] + ' ' return textStrings
def find_word(imgpath): time.sleep(0.5) img = cv2.imread(imgpath) _, result, angle = model.model(img, detectAngle=True, config=dict(MAX_HORIZONTAL_GAP=50, MIN_V_OVERLAPS=0.6, MIN_SIZE_SIM=0.6, TEXT_PROPOSALS_MIN_SCORE=0.1, TEXT_PROPOSALS_NMS_THRESH=0.3, TEXT_LINE_NMS_THRESH=0.7), leftAdjust=True, rightAdjust=True, alph=0.01) result = union_rbox(result, 0.2) res = [{ 'text': x['text'], 'name': str(i), 'box': { 'cx': x['cx'], 'cy': x['cy'], 'w': x['w'], 'h': x['h'], 'angle': x['degree'] } } for i, x in enumerate(result)] res = adjust_box_to_origin(img, angle, res) ##修正box txtpath = os.path.join('result', imgpath.split('/')[1].split('.')[0] + '.txt') print(res) with open(txtpath, 'w') as f: for n in res: str_temp = n['text'] f.write(str_temp) f.write('\n') f.close() if os.path.exists(imgpath): os.remove(imgpath)
def POST(self): t = time.time() data = web.data() uidJob = uuid.uuid1().__str__() data = json.loads(data) billModel = data.get('billModel', '') textAngle = data.get('textAngle', False) ##文字检测 textLine = data.get('textLine', False) ##只进行单行识别 imgString = data['imgString'].encode().split(b';base64,')[-1] img = base64_to_PIL(imgString) if img is not None: img = np.array(img) H, W = img.shape[:2] while time.time() - t <= TIMEOUT: if os.path.exists(filelock): continue else: with open(filelock, 'w') as f: f.write(uidJob) if textLine: ##单行识别 partImg = Image.fromarray(img) text = crnn.predict(partImg.convert('L')) res = [{ 'text': text, 'name': '0', 'box': [0, 0, W, 0, W, H, 0, H] }] os.remove(filelock) break else: detectAngle = textAngle result, angle = model.model( img, scale=scale, maxScale=maxScale, detectAngle=detectAngle, ##是否进行文字方向检测,通过web传参控制 MAX_HORIZONTAL_GAP=100, ##字符之间的最大间隔,用于文本行的合并 MIN_V_OVERLAPS=0.6, MIN_SIZE_SIM=0.6, TEXT_PROPOSALS_MIN_SCORE=0.1, TEXT_PROPOSALS_NMS_THRESH=0.3, TEXT_LINE_NMS_THRESH=0.99, ##文本行之间测iou值 LINE_MIN_SCORE=0.1, leftAdjustAlph=0.01, ##对检测的文本行进行向左延伸 rightAdjustAlph=0.01, ##对检测的文本行进行向右延伸 ) if billModel == '' or billModel == '通用OCR': result = union_rbox(result, 0.2) res = [{ 'text': x['text'], 'name': str(i), 'box': { 'cx': x['cx'], 'cy': x['cy'], 'w': x['w'], 'h': x['h'], 'angle': x['degree'] } } for i, x in enumerate(result)] res = adjust_box_to_origin(img, angle, res) ##修正box elif billModel == '火车票': res = trainTicket.trainTicket(result) res = res.res res = [{ 'text': res[key], 'name': key, 'box': {} } for key in res] elif billModel == '身份证': res = idcard.idcard(result) res = res.res res = [{ 'text': res[key], 'name': key, 'box': {} } for key in res] os.remove(filelock) break timeTake = time.time() - t return json.dumps({ 'res': res, 'timeTake': round(timeTake, 4) }, ensure_ascii=False)
def POST(self): data = web.data() data = json.loads(data) billModel = data.get('billModel', '') textAngle = data.get('textAngle', False) # 文字检测 textLine = data.get('textLine', False) # 只进行单行识别 imgString = data['imgString'].encode().split(b';base64,')[-1] imgString = base64.b64decode(imgString) jobid = uuid.uuid1().__str__() path = 'test/{}.jpg'.format(jobid) with open(path, 'wb') as f: f.write(imgString) img = cv2.imread(path) # GBR H, W = img.shape[:2] timeTake = time.time() if textLine: # 单行识别 partImg = Image.fromarray(img) text = model.crnnOcr(partImg.convert('L')) res = [{ 'text': text, 'name': '0', 'box': [0, 0, W, 0, W, H, 0, H] }] else: detectAngle = textAngle # 是否进行文字方向检测 _, result, angle = model.model( img, file_name="test.jpg", detectAngle=detectAngle, # 是否进行文字方向检测,通过web传参控制 config=dict( MAX_HORIZONTAL_GAP=50, # 字符之间的最大间隔,用于文本行的合并 MIN_V_OVERLAPS=0.6, MIN_SIZE_SIM=0.6, TEXT_PROPOSALS_MIN_SCORE=0.1, TEXT_PROPOSALS_NMS_THRESH=0.3, TEXT_LINE_NMS_THRESH=0.7), # 文本行之间测iou值 leftAdjust=True, # 对检测的文本行进行向左延伸 rightAdjust=True, # 对检测的文本行进行向右延伸 alpha=0.01) # 对检测的文本行进行向右、左延伸的倍数 print('[POST] result', result) print('[POST] angle', angle) if billModel == '' or billModel == '通用OCR': result = union_rbox(result, 0.2) res = [{ 'text': x['text'], 'name': str(i), 'box': { 'cx': x['cx'], 'cy': x['cy'], 'w': x['w'], 'h': x['h'], 'angle': x['degree'] } } for i, x in enumerate(result)] res = adjust_box_to_origin(img, angle, res) # 修正box elif billModel == '火车票': res = trainTicket.trainTicket(result) res = res.res res = [{ 'text': res[key], 'name': key, 'box': {} } for key in res] elif billModel == '身份证': res = idcard.idcard(result) res = res.res res = [{ 'text': res[key], 'name': key, 'box': {} } for key in res] timeTake = time.time() - timeTake os.remove(path) return json.dumps({ 'res': res, 'timeTake': round(timeTake, 4) }, ensure_ascii=False)
def getWordRecognition(self, img_file, bill_model): billModel = bill_model textAngle = True ##文字检测 textLine = False ##只进行单行识别 img = cv2.imread(img_file) ##GBR H, W = img.shape[:2] timeTake = time.time() if textLine: ##单行识别 partImg = Image.fromarray(img) text = model.crnnOcr(partImg.convert('L')) res = [{ 'text': text, 'name': '0', 'box': [0, 0, W, 0, W, H, 0, H] }] else: detectAngle = textAngle _, result, angle = model( img, detectAngle=detectAngle, ##是否进行文字方向检测,通过web传参控制 config=dict( MAX_HORIZONTAL_GAP=50, ##字符之间的最大间隔,用于文本行的合并 MIN_V_OVERLAPS=0.6, MIN_SIZE_SIM=0.6, TEXT_PROPOSALS_MIN_SCORE=0.1, TEXT_PROPOSALS_NMS_THRESH=0.3, TEXT_LINE_NMS_THRESH=0.7, ##文本行之间测iou值 ), leftAdjust=True, ##对检测的文本行进行向左延伸 rightAdjust=True, ##对检测的文本行进行向右延伸 alph=0.01, ##对检测的文本行进行向右、左延伸的倍数 ) if billModel == '' or billModel == '通用OCR': result = union_rbox(result, 0.2) res = [{ 'text': x['text'], 'name': str(i), 'box': { 'cx': x['cx'], 'cy': x['cy'], 'w': x['w'], 'h': x['h'], 'angle': x['degree'] } } for i, x in enumerate(result)] res = adjust_box_to_origin(img, angle, res) ##修正box elif billModel == '身份证': res = idcard.idcard(result) res = res.res res = [{ 'text': res[key], 'name': key, 'box': {} } for key in res] elif billModel == '驾驶证': res = drivinglicense.drivinglicense(result) res = res.res res = [{ 'text': res[key], 'name': key, 'box': {} } for key in res] elif billModel == '行驶证': res = vehiclelicense.vehiclelicense(result) res = res.res res = [{ 'text': res[key], 'name': key, 'box': {} } for key in res] elif billModel == '银行卡': res = bankcard.bankcard(result) res = res.res res = [{ 'text': res[key], 'name': key, 'box': {} } for key in res] elif billModel == '手写体': result = union_rbox(result, 0.2) res = [{ 'text': x['text'], 'name': str(i), 'box': { 'cx': x['cx'], 'cy': x['cy'], 'w': x['w'], 'h': x['h'], 'angle': x['degree'] } } for i, x in enumerate(result)] res = adjust_box_to_origin(img, angle, res) ##修正box elif billModel == '车牌': res = vehicleplate.vehicleplate(result) res = res.res res = [{ 'text': res[key], 'name': key, 'box': {} } for key in res] timeTake = time.time() - timeTake return {'res': res, 'timeTake': round(timeTake, 4)}
def getWordRecognition(self, img_file, bill_model): billModel = bill_model textAngle = True ##文字检测 textLine = False ##只进行单行识别 text = '' file_name = os.path.basename(img_file) file_path = os.path.dirname(img_file) # img = cv2.imread(img_file)##GBR img,is_exif,H,W = self.setExif(img_file) if is_exif is True: textAngle = False # H,W = img.shape[:2] timeTake = time.time() if textLine: ##单行识别 partImg = Image.fromarray(img) text = model.crnnOcr(partImg.convert('L')) res =[ {'text':text,'name':'0','box':[0,0,W,0,W,H,0,H]} ] else: detectAngle = textAngle _,result,angle= model(img, detectAngle=detectAngle,##是否进行文字方向检测,通过web传参控制 config=dict(MAX_HORIZONTAL_GAP=50,##字符之间的最大间隔,用于文本行的合并 MIN_V_OVERLAPS=0.6, MIN_SIZE_SIM=0.6, TEXT_PROPOSALS_MIN_SCORE=0.1, TEXT_PROPOSALS_NMS_THRESH=0.3, TEXT_LINE_NMS_THRESH = 0.7,##文本行之间测iou值 ), leftAdjust=True,##对检测的文本行进行向左延伸 rightAdjust=True,##对检测的文本行进行向右延伸 alph=0.01,##对检测的文本行进行向右、左延伸的倍数 ) if billModel=='' or billModel=='通用OCR' : text = self.getTextList(img,angle, result) result = union_rbox(result,0.2) res = [{'text':x['text'], 'name':str(i), 'box':{'cx':x['cx'], 'cy':x['cy'], 'w':x['w'], 'h':x['h'], 'angle':x['degree'] } } for i,x in enumerate(result)] res = adjust_box_to_origin(np.copy(img),angle, res)##修正box com_res = res elif billModel=='身份证': res = idcard.idcard(result) res = res.res res =[ {'text':res[key],'name':key,'box':{}} for key in res] text = self.getTextList(img,angle, result) result = union_rbox(result,0.2) com_res = [{'text':x['text'], 'name':str(i), 'box':{'cx':x['cx'], 'cy':x['cy'], 'w':x['w'], 'h':x['h'], 'angle':x['degree'] } } for i,x in enumerate(result)] com_res = adjust_box_to_origin(np.copy(img),angle, com_res)##修正box elif billModel=='驾驶证': res = drivinglicense.drivinglicense(result) res = res.res res =[ {'text':res[key],'name':key,'box':{}} for key in res] text = self.getTextList(img,angle, result) result = union_rbox(result,0.2) com_res = [{'text':x['text'], 'name':str(i), 'box':{'cx':x['cx'], 'cy':x['cy'], 'w':x['w'], 'h':x['h'], 'angle':x['degree'] } } for i,x in enumerate(result)] com_res = adjust_box_to_origin(np.copy(img),angle, com_res)##修正box elif billModel=='行驶证': res = vehiclelicense.vehiclelicense(result) res = res.res res =[ {'text':res[key],'name':key,'box':{}} for key in res] text = self.getTextList(img,angle, result) result = union_rbox(result,0.2) com_res = [{'text':x['text'], 'name':str(i), 'box':{'cx':x['cx'], 'cy':x['cy'], 'w':x['w'], 'h':x['h'], 'angle':x['degree'] } } for i,x in enumerate(result)] com_res = adjust_box_to_origin(np.copy(img),angle, com_res)##修正box elif billModel=='营业执照': res = businesslicense.businesslicense(result) res = res.res res =[ {'text':res[key],'name':key,'box':{}} for key in res] text = self.getTextList(img,angle, result) result = union_rbox(result,0.2) com_res = [{'text':x['text'], 'name':str(i), 'box':{'cx':x['cx'], 'cy':x['cy'], 'w':x['w'], 'h':x['h'], 'angle':x['degree'] } } for i,x in enumerate(result)] com_res = adjust_box_to_origin(np.copy(img),angle, com_res)##修正box elif billModel=='银行卡': res = bankcard.bankcard(result) res = res.res res =[ {'text':res[key],'name':key,'box':{}} for key in res] text = self.getTextList(img,angle, result) result = union_rbox(result,0.2) com_res = [{'text':x['text'], 'name':str(i), 'box':{'cx':x['cx'], 'cy':x['cy'], 'w':x['w'], 'h':x['h'], 'angle':x['degree'] } } for i,x in enumerate(result)] com_res = adjust_box_to_origin(np.copy(img),angle, com_res)##修正box elif billModel=='手写体': result = union_rbox(result,0.2) res = [{'text':x['text'], 'name':str(i), 'box':{'cx':x['cx'], 'cy':x['cy'], 'w':x['w'], 'h':x['h'], 'angle':x['degree'] } } for i,x in enumerate(result)] res = adjust_box_to_origin(np.copy(img),angle, res)##修正box text = self.getTextList(img,angle, result) elif billModel=='车牌': res = vehicleplate.vehicleplate(result) res = res.res res =[ {'text':res[key],'name':key,'box':{}} for key in res] text = self.getTextList(img,angle, result) result = union_rbox(result,0.2) com_res = [{'text':x['text'], 'name':str(i), 'box':{'cx':x['cx'], 'cy':x['cy'], 'w':x['w'], 'h':x['h'], 'angle':x['degree'] } } for i,x in enumerate(result)] com_res = adjust_box_to_origin(np.copy(img),angle, com_res)##修正box elif billModel=='名片': res = businesscard.businesscard(result) res = res.res res =[ {'text':res[key],'name':key,'box':{}} for key in res] text = self.getTextList(img,angle, result) result = union_rbox(result,0.2) com_res = [{'text':x['text'], 'name':str(i), 'box':{'cx':x['cx'], 'cy':x['cy'], 'w':x['w'], 'h':x['h'], 'angle':x['degree'] } } for i,x in enumerate(result)] com_res = adjust_box_to_origin(np.copy(img),angle, com_res)##修正box timeTake = time.time()-timeTake #draw box in to original image drawBoxes = [] draw_filename = file_name.split('.')[0] + '_drawed.' + file_name.split('.')[1] drawPath = os.path.join(file_path,draw_filename) drawUrl = settings.FILE_URL + settings.MEDIA_URL + 'photos' + '/' + draw_filename # img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) if len(com_res) > 0: for arr in com_res: drawBoxes.append(arr["box"]) drawImg = draw_boxes(np.array(img),drawBoxes) cv2.imwrite(drawPath, drawImg) return {'res':res,'timeTake':round(timeTake,4), 'text':text, 'com_res': com_res, 'drawUrl': drawUrl}
def currency_invoice(img, arg, arg2, model_flg): from main import TextOcrModel angle_detect = None model = TextOcrModel(ocr, text_detect, angle_detect) # print(arg[12],'---------------------',type(arg[12])) if model_flg == True: model = TextOcrModel(ocr, text_detect, angle_detect) rotate_img = color_filter(img, color_thre=255, mode='less') img = color_filter(rotate_img, color_thre=50, mode='more') if lab == '-1': result, angle, im_show = model.model_CRAFT( img, scale=arg[0], detectAngle=False, ##是否进行文字方向检测 MAX_HORIZONTAL_GAP=arg[2], ##字符之间的最大间隔 #30 MIN_V_OVERLAPS=arg[3], #0.5 TEXT_PROPOSALS_MIN_SCORE=arg[4], #[0.5, 0.2, 0.7], leftAdjustAlph=arg[6], ## rightAdjustAlph=arg[7], ## Adjustbox=arg[5], ## pixel_filter=arg[8], ## batch_by_1=arg[9], scoremap_enhance_pixel=arg[10], ) elif lab == '0': result, angle, im_show = model.model_CRAFT( img, scale=arg[0], detectAngle=False, ##是否进行文字方向检测 MAX_HORIZONTAL_GAP=arg[2], ##字符之间的最大间隔 #30 MIN_V_OVERLAPS=arg[3], #0.5 TEXT_PROPOSALS_MIN_SCORE=arg[4], #[0.5, 0.2, 0.7], leftAdjustAlph=arg[6], ## rightAdjustAlph=arg[7], ## Adjustbox=arg[5], ## pixel_filter=arg[8], ## 参数一是过滤宽,第二个参数是过滤高 batch_by_1=arg[9], scoremap_enhance_pixel=arg[10], ) # ti2 = time.asctime(time.localtime(time.time())) # print("本地时间为 :", ti2) else: model = TextOcrModel(ocr, text_detect_p, angle_detect) rotate_img = color_filter(img, color_thre=255, mode='less') img = color_filter(rotate_img, color_thre=50, mode='more') # ------------------------变换--------------------------- gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) edges = cv.Canny(gray, 50, 150, apertureSize=3) # 霍夫变换 lines = cv.HoughLines(edges, 1, np.pi / 180, 0) for rho, theta in lines[0]: a = np.cos(theta) b = np.sin(theta) x0 = a * rho y0 = b * rho x1 = int(x0 + 1000 * (-b)) y1 = int(y0 + 1000 * (a)) x2 = int(x0 - 1000 * (-b)) y2 = int(y0 - 1000 * (a)) # print(x1, '---------', x2, '---------', x2, '---------', y2) if x1 == x2 or y1 == y2 or y2 == -1000: rotate_img = img else: t = float(y2 - y1) / (x2 - x1) rotate_angle = math.degrees(math.atan(t)) if rotate_angle > 45: rotate_angle = -90 + rotate_angle elif rotate_angle < -45: rotate_angle = 90 + rotate_angle rotate_img = ndimage.rotate(img, rotate_angle) result, angle, im_show = model.model_PSENET( rotate_img, scale=arg2[0], maxScale=arg2[1], detectAngle=arg2[2], ##是否进行文字方向检测 MAX_HORIZONTAL_GAP=arg2[3], ##字符之间的最大间隔 MIN_V_OVERLAPS=arg2[4], TEXT_PROPOSALS_MIN_SCORE=arg2[5], leftAdjustAlph=arg2[6], ## rightAdjustAlph=arg2[7], ## Adjustbox=arg2[8], ## ) # result = union_rbox(result, 0.2) res = [{ 'text': x['text'], 'name': str(i), 'box': { 'cx': x['cx'], 'cy': x['cy'], 'w': x['w'], 'h': x['h'], 'angle': x['degree'] } } for i, x in enumerate(result)] res = adjust_box_to_origin(img, angle, res) ##修正box return res, im_show