def invoice_ocr(): # 校验请求参数 if 'file' not in request.files: return build_api_result(101, "请求参数错误", {}, {}, {}, {}) # 获取请求参数 file = request.files['file'] source_file_name = file.filename.strip('"') source_file_suffix = get_file_suffix(source_file_name).lower() # 检查文件扩展名 if source_file_suffix not in allowed_extension: return build_api_result(102, "失败,文件格式问题", source_file_name, {}, {}, {}) invoice_file_name = str(uuid.uuid3( uuid.NAMESPACE_URL, source_file_name)) + "." + source_file_suffix upload_path = temp_dir whole_path = os.path.join(upload_path, invoice_file_name) file.save(whole_path) #去章处理方法 def remove_stamp(path, invoice_file_name): img = cv2.imread(path, cv2.IMREAD_COLOR) B_channel, G_channel, R_channel = cv2.split(img) # 注意cv2.split()返回通道顺序 _, RedThresh = cv2.threshold(R_channel, 170, 255, cv2.THRESH_BINARY) cv2.imwrite('./{}/nostamp_{}'.format(temp_dir, invoice_file_name), RedThresh) def Recognition_invoice(path): ''' 识别发票类别 :param none: :return: 发票类别 ''' remove_stamp(path, invoice_file_name) img1 = './{}/nostamp_{}'.format(temp_dir, invoice_file_name) img1 = cv2.imread(img1) result_type = OCR(img1) result_type = union_rbox(result_type, 0.2) print(result_type) if len(result_type) > 0: N = len(result_type) for i in range(N): txt = result_type[i]['text'].replace(' ', '') txt = txt.replace(' ', '') type_1 = re.findall('电子普通', txt) type_2 = re.findall('普通发票', txt) type_3 = re.findall('专用发票', txt) if type_1 is None: type_1 = [] if type_2 is None: type_2 = [] if type_3 is None: type_3 = [] print(type_1) print(type_2) print(type_3) if len(type_1) > 0: return 1 else: return 2 elif len(result_type) == 0: return 2 recognition_invoice = Recognition_invoice(whole_path) img = cv2.imread(whole_path) h, w = img.shape[:2] if recognition_invoice == 1: result = ocr_E(img) res = invoice_e(result) res = res.res elif recognition_invoice == 2: result = ocr_M(img) res = invoice_m(result) res = res.res else: res = [] if len(res) > 0: tz = pytz.timezone('Asia/Shanghai') #东八区 ocr_identify_time = datetime.fromtimestamp(int(time.time()), pytz.timezone('Asia/Shanghai'))\ .strftime('%Y-%m-%d %H:%M:%S') return build_api_result(100, "识别成功", res, source_file_name, invoice_file_name, ocr_identify_time) elif len(res) == 0: return build_api_result(104, "识别为空!", {}, source_file_name, invoice_file_name, {})
def invoice_ocr(): start = time.time() # 校验请求参数 if 'file' not in request.files: return build_api_result(101, "请求参数错误", {},{},{}) # 获取请求参数 file = request.files['file'] invoice_file_name = file.filename # 检查文件扩展名 if not allowed_file(invoice_file_name): return build_api_result(102, "失败,文件格式问题", {},{},{}) upload_path = "test" whole_path = os.path.join(upload_path,invoice_file_name) file.save(whole_path) #去章处理方法 def remove_stamp(path,invoice_file_name): img = cv2.imread(path,cv2.IMREAD_COLOR) B_channel,G_channel,R_channel=cv2.split(img) # 注意cv2.split()返回通道顺序 _,RedThresh = cv2.threshold(R_channel,170,355,cv2.THRESH_BINARY) cv2.imwrite('./test/RedThresh_{}.jpg'.format(invoice_file_name),RedThresh) def Recognition_invoice(path): ''' 识别发票类别 :param none: :return: 发票类别 ''' remove_stamp(path,invoice_file_name) img1 = './test/RedThresh_{}.jpg'.format(invoice_file_name) img1 = cv2.imread(img1) result_type = OCR(img1) result_type = union_rbox(result_type, 0.2) print(result_type) if len(result_type) > 0: N = len(result_type) for i in range(N): txt = result_type[i]['text'].replace(' ', '') txt = txt.replace(' ', '') type_1 = re.findall('电子普通',txt) type_2 = re.findall('普通发票',txt) type_3 = re.findall('专用发票',txt) if type_1 == None: type_1 = [] if type_2 == None: type_2 = [] if type_3 == None: type_3 = [] print(type_1) print(type_2) print(type_3) if len(type_1) > 0: return 1 else: return 2 elif len(result_type)==0: return 2 Recognition_invoice = Recognition_invoice(whole_path) img = cv2.imread(whole_path) h, w = img.shape[:2] if Recognition_invoice == 1: result = ocr_E(img) res = invoice_e(result) res = res.res elif Recognition_invoice == 2: result = ocr_M(img) res = invoice_m(result) res = res.res else: res = [] end = time.time() print(f"Used {end - start}s") if len(res) > 0: tz = pytz.timezone('Asia/Shanghai') #东八区 ocr_identify_time = datetime.fromtimestamp(int(time.time()),pytz.timezone('Asia/Shanghai')).strftime('%Y-%m-%d %H:%M:%S') return build_api_result(100, "识别成功" , res, invoice_file_name,ocr_identify_time) elif len(res) == 0: return build_api_result(104, "识别为空!" ,{},{},{})