def DetectVATInvoice(box, symbol, filePath): # 识别增值税发票种类1 # 定位二维码 # 定位× dic = xmlToDict.XmlTodict('ModeLabel_00001.xml') tplt = [ dic['QRCode'][0], dic['QRCode'][1], dic['figureX'][2], dic['figureX'][3] ] for c in tplt: if c == None: print('Templet VATInvoice error') for item in dic: if item != 'QRCode' and item != 'figureX': # print(item) tmp = MakeFileInV([[int(dic.get(item)[0]), int(dic.get(item)[1])], [int(dic.get(item)[2]), int(dic.get(item)[3])]], box, symbol, filePath, item, tplt) print(item + ": " + OcrPic(tmp)) js = InterfaceType.JsonInterface.invoice() js.addVATInvoiceInfo
def mubanDetectInvoiceDate(filepath, setKey='invoiceDate'): midProcessResult = [None, None] midProcessResult[0] = filepath midProcessResult[1] = 11 # vat发票专票 VATInvoiceTemplet = { } dic = xmlToDict.XmlTodict('/home/huangzheng/ocr/VATInvoiceSimpleMuban.xml') # tplt = [dic['QRCode'][0], dic['QRCode'][1], dic['figureX'][0] + dic['figureX'][2] / 2, dic['figureX'][1] + dic['figureX'][3] / 2] tplt = [dic['figureX'][0] + dic['figureX'][2] / 2, dic['figureX'][1] + dic['figureX'][3] / 2] # print(tplt) ''' for c in tplt: if c == None: print('Templet VATInvoice error') ''' TemType = {} if midProcessResult[1] == 11: # 增值税专用 VATInvoiceTemplet[setKey] = [int(dic.get(setKey)[0]), int(dic.get(setKey)[1]), int(dic.get(setKey)[2]), int(dic.get(setKey)[3])] TemType = VATInvoiceTemplet fcv = cv2.imread(filepath, 1) # print(fcv) try: w1 = fcv.shape except: print("picture is None") if w1[0] + w1[1] > 1500: rate = 0.5 # print("rate : 0.5") if midProcessResult[1] == 11: # box = Detect.detect(cv2.imread(midProcessResult[0]), rate) figureP = FindCircle.findSymbol(filepath) # StBox = sortBox(box) # print(box) # print(figureP) # print(StBox) if figureP == None: return None Templet = simplyAdjust(TemType, [figureP[0], figureP[1]], tplt, w1) # 增值税专票 attributeLine = lineToAttribute.getAtbt.compute(textline(midProcessResult[0]), Templet) return attributeLine
def init(filepath, type): # filepath = 'Image_065.jpg' '''if len(sys.argv) > 1: filePathg = sys.argv[1] filepath = filePathg ''' # print(sys.argv) # print(filepath) if type == None: midProcessResult = detectType.detectType(filepath) # tangpeng 预处理 # 未分类 else: if type == 'blue' or type == 'excess': midProcessResult = PipeInvoice.getPipe(filepath, type, False) else: ##type == 'red' midProcessResult = [None, None, None] midProcessResult[0] = filepath midProcessResult[1] = 2 # 专票 midProcessResult[2] = textline(midProcessResult[0]) # midProcessResult = [None, None] # midProcessResult[0] = midPR[0] # midProcessResult[1] = midPR[1] # midProcessResult[0] = filepath # 行提取 blueTemplet = { 'departCity': [48, 62, 222, 56], 'arriveCity': [412, 61, 228, 55], 'trainNumber': [264, 62, 170, 57], 'invoiceDate': [24, 139, 369, 42], 'seatNum': [408, 138, 160, 40], 'idNum': [22, 276, 306, 38], 'passenger': [328, 276, 150, 38], 'totalAmount': [33, 177, 151, 39], 'ticketsNum': [21, 10, 195, 66] } ''' 'departCity': [29, 74, 218, 54], 'arriveCity': [425, 68, 224, 64], 'trainNumber': [230, 65, 203, 62], 'invoiceDate': [0, 163, 357, 41], 'seatNum': [392, 164, 203, 46], 'idNum': [0, 343, 350, 45], 'totalAmount': [3, 206, 212, 52], 'ticketsNum': [34, 40, 202, 47] }''' redTemplet = { 'idNum': [66, 242, 357, 38], 'departCity': [66, 66, 222, 45], 'arriveCity': [388, 66, 225, 47], 'trainNumber': [288, 53, 103, 41], 'invoiceDate': [66, 114, 237, 43], 'seatNum': [400, 115, 210, 46], 'totalAmount': [66, 163, 188, 34], 'ticketsNum': [21, 23, 218, 47] } '''''' excessTemplet = { 'departCity': [26, 40, 151, 33], 'arriveCity': [271, 40, 169, 33], 'trainNumber': [178, 35, 92, 32], 'invoiceDate': [12, 82, 203, 29], 'seatNum': [315, 79, 136, 33], 'totalAmount': [12, 118, 167, 28], 'ticketsNum': [23, 12, 147, 37] } # vatinvoice VATInvoiceTemplet = {} dic = xmlToDict.XmlTodict('VATInvoiceMuban.xml') tplt = [ dic['QRCode'][0], dic['QRCode'][1], dic['figureX'][0] + dic['figureX'][2] / 2, dic['figureX'][1] + dic['figureX'][3] / 2 ] # print(tplt) for c in tplt: if c == None: print('Templet VATInvoice error') # print(VATInvoiceTemplet) # midProcessResult[1] = 2#专票 for test # TemType = redTemplet #默认蓝票 if midProcessResult[1] == 1: TemType = blueTemplet if midProcessResult[1] == 2: TemType = redTemplet if midProcessResult[1] == 3: TemType = excessTemplet if midProcessResult[1] == 11: # 增值税专用 预留 for item in dic: if item != 'QRCode' and item != 'figureX': # print(item) # tmp = MakeFileInV([[int(dic.get(item)[0]), int(dic.get(item)[1])], [int(dic.get(item)[2]), int(dic.get(item)[3])]], box, symbol, filePath, item, tplt) VATInvoiceTemplet[item] = [ int(dic.get(item)[0]), int(dic.get(item)[1]), int(dic.get(item)[2]), int(dic.get(item)[3]) ] TemType = VATInvoiceTemplet rate = 1 # print(filepath) fcv = cv2.imread(filepath, 1) # print(fcv) w1 = fcv.shape if w1[0] + w1[1] > 1500: rate = 0.5 print("rate : 0.5") if midProcessResult[1] == 1: box = Detect.detect(cv2.imread(midProcessResult[0]), rate) Templet = adjustToTextLine(TemType, box, midProcessResult[1], None) # 蓝火车票 if midProcessResult[1] == 2: rate = 2.0 print("rate : 2.0") box = Detect.detect(cv2.imread(midProcessResult[0]), rate) # print( box.tolist()) Templet = adjustToTextLine(TemType, box, midProcessResult[1], None) # 红火车票 if midProcessResult[1] == 3: rate = 1.0 print("rate : 1.0") box = Detect.detect(cv2.imread(midProcessResult[0]), rate) # print( box.tolist()) Templet = adjustToTextLine(TemType, box, midProcessResult[1], None) # 红(补票)车票 if midProcessResult[1] == 11: box = Detect.detect(cv2.imread(midProcessResult[0]), rate) figureP = FindCircle.findSymbol(filepath) StBox = sortBox(box) # print(box) # print(figureP) # print(StBox) Templet = adjustToTextLine( TemType, [StBox[0], StBox[1], figureP[0], figureP[1]], midProcessResult[1], tplt) # 增值税专票 im = cv2.imread(filepath, 0) rec = [] for c in TemType: rec.append(TemType[c]) vis_textline0 = fp.util.visualize.rects(im, rec) # vis_textline1 = fp.util.visualize.rects(im, rects, types) # 显示 pl.figure(figsize=(15, 10)) pl.subplot(2, 2, 1) pl.imshow(im, 'gray') pl.subplot(2, 2, 2) pl.imshow(vis_textline0) pl.show() # print(textline(midProcessResult[0])) attributeLine = lineToAttribute.getAtbt.compute(midProcessResult[2], Templet) # print(attributeLine) # print(type(attributeLine)) # print(attributeLine['departCity']) jsonResult = flow.cropToOcr(midProcessResult[0], attributeLine, midProcessResult[1]) # ocr和分词 print(jsonResult) return jsonResult
def mubanDetectInvoiceDate(filepath, setKey='invoiceDate'): midProcessResult = [None, None] midProcessResult[0] = filepath midProcessResult[1] = 11 # vat发票专票 VATInvoiceTemplet = {} dic = xmlToDict.XmlTodict('/home/huangzheng/ocr/VATInvoiceSimpleMuban.xml') # tplt = [dic['QRCode'][0], dic['QRCode'][1], dic['figureX'][0] + dic['figureX'][2] / 2, dic['figureX'][1] + dic['figureX'][3] / 2] tplt = [ dic['figureX'][0] + dic['figureX'][2] / 2, dic['figureX'][1] + dic['figureX'][3] / 2 ] # print(tplt) ''' for c in tplt: if c == None: print('Templet VATInvoice error') ''' TemType = {} if midProcessResult[1] == 11: # 增值税专用 VATInvoiceTemplet[setKey] = [ int(dic.get(setKey)[0]), int(dic.get(setKey)[1]), int(dic.get(setKey)[2]), int(dic.get(setKey)[3]) ] TemType = VATInvoiceTemplet fcv = cv2.imread(filepath, 1) # print(fcv) try: w1 = fcv.shape except: print("picture is None") if w1[0] + w1[1] > 1500: rate = 0.5 # print("rate : 0.5") if midProcessResult[1] == 11: # box = Detect.detect(cv2.imread(midProcessResult[0]), rate) figureP = FindCircle.findSymbol(filepath) # StBox = sortBox(box) # print(box) # print(figureP) # print(StBox) if figureP == None: return None Templet = simplyAdjust(TemType, [figureP[0], figureP[1]], tplt, w1) # 增值税专票 attributeLine = lineToAttribute.getAtbt.compute( textline(midProcessResult[0]), Templet) # 生成行提取的图片 plt_rects = [] for x in attributeLine: plt_rects.append(attributeLine[x]) # 显示 vis_textline0 = fp.util.visualize.rects( cv2.imread(midProcessResult[0], 0), plt_rects) pl.imshow(vis_textline0) # 保存到line目录 pltpath = midProcessResult[0].replace("upload", "line") try: pl.savefig(pltpath) except Exception as e: print("绘制行提取图片不支持bmp格式:{}".format(e)) return attributeLine
def surface(filename, type='blue'): filepath = os.path.join('allstatic', filename) # 原方法 if type == None: midProcessResult = detectType.detectType('allstatic', filename) # tangpeng 预处理 # 未分类 else: if type == 'blue' or type == 'excess': # return out_filename, 1, pipe.textlines, cdic midProcessResult = PipeInvoice.getPipe('allstatic', filename, type, False) else: ##type == 'red' midProcessResult = [None, None, None] out_filename = filename.replace('upload', 'out') out_filename = os.path.join('allstatic', out_filename) # 拷贝到out shutil.copy(filepath, out_filename) midProcessResult[0] = out_filename midProcessResult[1] = 2 # 专票 # 暂时用原图,不用校正后的图 midProcessResult[2] = textline(midProcessResult[0]) # 行提取 blueTemplet = { 'departCity': [48, 62, 222, 56], 'arriveCity': [412, 61, 228, 55], 'trainNumber': [264, 62, 170, 57], 'invoiceDate': [24, 139, 369, 42], 'seatNum': [408, 138, 160, 40], 'idNum': [22, 276, 306, 38], 'passenger': [328, 276, 150, 38], 'totalAmount': [33, 177, 151, 39], 'ticketsNum': [21, 10, 195, 66] } ''' 'departCity': [29, 74, 218, 54], 'arriveCity': [425, 68, 224, 64], 'trainNumber': [230, 65, 203, 62], 'invoiceDate': [0, 163, 357, 41], 'seatNum': [392, 164, 203, 46], 'idNum': [0, 343, 350, 45], 'totalAmount': [3, 206, 212, 52], 'ticketsNum': [34, 40, 202, 47] }''' redTemplet = { 'idNum': [66, 242, 357, 38], 'departCity': [66, 66, 222, 45], 'arriveCity': [388, 66, 225, 47], 'trainNumber': [288, 53, 103, 41], 'invoiceDate': [66, 114, 237, 43], 'seatNum': [400, 115, 210, 46], 'totalAmount': [66, 163, 188, 34], 'ticketsNum': [21, 23, 218, 47] } '''''' excessTemplet = { 'departCity': [26, 40, 151, 33], 'arriveCity': [271, 40, 169, 33], 'trainNumber': [178, 35, 92, 32], 'invoiceDate': [12, 82, 203, 29], 'seatNum': [315, 79, 136, 33], 'totalAmount': [12, 118, 167, 28], 'ticketsNum': [23, 12, 147, 37] } # vatinvoice VATInvoiceTemplet = { } if midProcessResult[1] == 1: TemType = blueTemplet if midProcessResult[1] == 2: TemType = redTemplet if midProcessResult[1] == 3: TemType = excessTemplet if midProcessResult[1] == 11: # 增值税专用 预留 dic = xmlToDict.XmlTodict('VATInvoiceMuban.xml') tplt = [dic['QRCode'][0], dic['QRCode'][1], dic['figureX'][0] + dic['figureX'][2] / 2, dic['figureX'][1] + dic['figureX'][3] / 2] # print(tplt) for c in tplt: if c == None: print('Templet VATInvoice error') for item in dic: if item != 'QRCode' and item != 'figureX': # print(item) # tmp = MakeFileInV([[int(dic.get(item)[0]), int(dic.get(item)[1])], [int(dic.get(item)[2]), int(dic.get(item)[3])]], box, symbol, filePath, item, tplt) VATInvoiceTemplet[item] = [int(dic.get(item)[0]), int(dic.get(item)[1]), int(dic.get(item)[2]), int(dic.get(item)[3])] TemType = VATInvoiceTemplet rate = 1 fcv = cv2.imread(filepath, 1) w1 = fcv.shape if w1[0] + w1[1] > 1500: rate = 0.5 # print("rate : 0.5") if midProcessResult[1] == 1: box = Detect.detect(cv2.imread(midProcessResult[0]), rate) Templet = adjustToTextLine(TemType, box, midProcessResult[1], None) # 蓝火车票 if midProcessResult[1] == 2: rate = 2.0 print("rate : 2.0") box = Detect.detect(cv2.imread(midProcessResult[0]), rate) # print( box.tolist()) Templet = adjustToTextLine(TemType, box, midProcessResult[1], None) # 红火车票 if midProcessResult[1] == 3: rate = 1.0 print("rate : 1.0") box = Detect.detect(cv2.imread(midProcessResult[0]), rate) # print( box.tolist()) Templet = adjustToTextLine(TemType, box, midProcessResult[1], None) # 红(补票)车票 if midProcessResult[1] == 11: box = Detect.detect(cv2.imread(midProcessResult[0]), rate) figureP = FindCircle.findSymbol(filepath) StBox = sortBox(box) Templet = adjustToTextLine(TemType, [StBox[0], StBox[1], figureP[0], figureP[1]], midProcessResult[1], tplt) # 增值税专票 attributeLine = lineToAttribute.getAtbt.compute(midProcessResult[2], Templet) # 生成行提取的图片 plt_rects = [] for x in attributeLine: plt_rects.append(attributeLine[x]) # 显示 vis_textline0 = fp.util.visualize.rects(cv2.imread(midProcessResult[0], 0), plt_rects) pl.imshow(vis_textline0) # 保存到line目录 pltpath = midProcessResult[0].replace("out", "line") try: pl.savefig(pltpath) except Exception as e: print("绘制行提取图片不支持bmp格式:{}".format(e)) pass return midProcessResult[0], midProcessResult[1], attributeLine