def build_model(config): vocab = Vocab(config['vocab']) device = config['device'] model = OCR(len(vocab), config) model = model.to(device) return model, vocab
def __init__(self, initialize_whatsapp=True, session=None): chrome_options = Options() if session: chrome_options.add_argument("--user-data-dir={}".format(session)) try: self.browser = webdriver.Chrome(options=chrome_options) except: # if previous session is left open, close it if getWindowsWithTitle('WhatsApp - Google Chrome'): getWindowsWithTitle('WhatsApp - Google Chrome')[0].close() info("Session is already open. \"WhatsApp - Google Chrome\" is closing...") if getWindowsWithTitle('New Tab - Google Chrome'): getWindowsWithTitle('New Tab - Google Chrome')[0].close() info("Session is already open. \"New Tab - Google Chrome\" is closing...") self.browser = webdriver.Chrome(options=chrome_options) else: self.browser = webdriver.Chrome() info("Chrome Driver is initialized successfully.") if initialize_whatsapp: self.browser.get("https://web.whatsapp.com/") info("WhatsApp Web Client is opening...") self.find_wait("copyable-text.selectable-text", By.CLASS_NAME, timeout=30) self.browser.maximize_window() self.OCR = OCR() from splunk import Splunk self.splunk = Splunk() from bot import Bot self.bot = Bot(self.splunk)
def onLeftButtonUp(event): self.sel = False try: self.canvas.delete(lastDraw) except Exception as e: pass sleep(0.1) #考虑鼠标左键从右下方按下而从左上方抬起的截图 left, right = sorted([self.X.get(), event.x]) top, bottom = sorted([self.Y.get(), event.y]) pic = ImageGrab.grab((left + 1, top + 1, right, bottom)) #弹出保存截图对话框 fileName = "/home/robotsl/workspace/ocr_enhancement/OCR/images/for_ocr.png" #关闭当前窗口 self.top.destroy() pic.save(fileName) tk_obj.init_window.state('normal') result, log = OCR.OCR_OR_LOGMAX("log") #sorted,indes = torch.sort(log,-1) print(log[:, :10].shape) #tk_obj.set_result(result,"before_enhance") tk_obj.init_data_Text.delete('1.0', 'end') #tk_obj.init_data_Text.insert("end","test") tk_obj.init_data_Text.insert("end", result) tk_obj.result_data_Text.delete('1.0', 'end')
def main(args): pdf_file = args['pdf_file'] pdf_images = pdf2img(pdf_file) images = pdf_images # images = select_imgs_with_form(pdf_images) ocr = OCR() for index, image in enumerate(images): image = correct_img_bias(image) fr = formRecognition(image) tables = fr.run() for i in range(len(tables)): table = tables[i] for idx, cell in enumerate(table): cell_img = ocr.img_enhancement(cell) res = ocr.text_recognition(cell_img) print(res, end='\n')
def parse_request(): if request.method == 'POST': print("receiving") url = request.json.get('url') raw_string = OCR(url) print(raw_string) translated_words = translate_text(raw_string) return jsonify(translated_words)
def __init__(self, map_image, map_path): super(Map, self).__init__() self._map_image = map_image self._map_path = map_path self._ocr = OCR() self._margin_left = self.MARGIN_LEFT self._margin_top = self.MARGIN_TOP self._margin_right = self.MARGIN_RIGHT self._margin_bottom = self.MARGIN_BOTTOM self._x = OCR.INVALID_COORDINATE self._y = OCR.INVALID_COORDINATE self._width = OCR.INVALID_COORDINATE self._height = OCR.INVALID_COORDINATE self._RefreshCoordinates()
def upload(): target_path = os.path.join(APP_ROOT, 'static/images/') if not os.path.isdir(target_path): os.mkdir(target_path) file = request.files.getlist("file")[0] filename = file.filename final_path = "".join([target_path, filename]) file.save(final_path) imagepath = "./static/images/" + filename save_to_file = filename.split(".") save_to_file = "./static/images/" + save_to_file[ 0] + "_processed." + save_to_file[1] OCR(imagepath, save_to_file) #converting the image to text text = pytesseract.image_to_string(Image.open(save_to_file), lang='pol') #loading the second page return render_template("processed.html", image_unprocessed=imagepath, display=text)
def __init__(self): OCR.__init__(self)
inputmanager = InputManager( outDim=(2000, 1000), # wxh inputtype='test', winKeyword='Zoom Meeting', crop=(325, 62, 325 + 1280, 62 + 960), #x1,y1,x2,y2 (TL,BR) ) # initialize pre process manager thread analyzer = AnalyzerThread( inputmanager, transIngressQueue, frameAverage=20, ) # initialize post process modules ocr = OCR() parser = Parser() # initialize post process manager thread translator = TranslatorThread(ocr, parser, transIngressQueue) # start threads analyzer.start() translator.start() try: while True: time.sleep(1) except KeyboardInterrupt: analyzer.kill() translator.kill()
def get_result(self): return "".join([ OCR(self.to_numberic_grid(i)).match_char() for i in range(self.chars_len) ])
def train(self, chars): for i, char in enumerate(chars): OCR(self.to_numberic_grid(i)).train_char(char)
def main(): print('{}{:=<50}{}'.format(CP_Y, '', CP_C)) print('{}**{}{:^46}{}**{}'. format(CP_Y, CP_R, 'Game Informantion Collector', CP_Y, CP_C)) print('{}**{}{:^46}{}**{}'. format(CP_Y, CP_R, 'By: Abhishek Chaurasia', CP_Y, CP_C)) print('{}{:=<50}{}'.format(CP_Y, '', CP_C)) # Grab frames from screen or video # Replace it with any other frame grabber frame_grabber = VideoReader(args.video_path) # Initialization ocr = OCR(args.model, args.debug) items = {} n_items = 0 keyvalues = open(args.key_info, 'r') # Ignore first two lines keyvalues.readline() keyvalues.readline() for line in keyvalues: item = line.split() # parsed info: keyword | tx | ty | bx | by items[n_items] = (item[0], item[2], item[4], item[6], item[8]) n_items += 1 ######################################## # Ignore this section: # Important only when you care about printed values print('{:=<50}'.format('')) pad = (50//n_items) - 2 for n_item in items: print_val(items[n_item][0], pad, n_item, len(items)) print('\n{:-<50}'.format('')) ######################################## # Get next frame while frame_grabber.next_frame(): current_frame = frame_grabber.frame # Crop section of the frame containing value you are interested in for n_item in items: tx = int(items[n_item][1]) ty = int(items[n_item][2]) bx = int(items[n_item][3]) by = int(items[n_item][4]) key_part = current_frame[ty:by, tx:bx, :] # send the cropped area and get its value value = ocr.forward(key_part) # Create box around idividual ROIs font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(current_frame, str(value), (tx-10,ty-10), font, 1, (255,255,255), 1) # cv2.rectangle(current_frame, (tx, ty), (bx, by), (0, 255, 0), 1) print_val(value, pad, n_item, len(items)) print("") if args.debug: pass else: cv2.startWindowThread() cv2.namedWindow("Video") cv2.imshow('Video', current_frame) if args.debug: cv2.waitKey(1)
def newOcr(filepath, model): ocr.OCR(filepath, base_model=model)
def cropToOcr(filePath, recT, typeT, debug=False, isusebaidu=False): ocrResult = {} img = Image.open(filePath) if os.path.exists( jwkj_get_filePath_fileName_fileExt(filePath)[0] + "tmp/" + jwkj_get_filePath_fileName_fileExt(filePath)[ 1]) == False: os.mkdir( jwkj_get_filePath_fileName_fileExt(filePath)[0] + "tmp/" + jwkj_get_filePath_fileName_fileExt(filePath)[ 1]) # 加载自识别ocr模型(增值税专票模型)可设置typeT为11加载 model = ocr.load_model() for x in recT: sp = img.crop((recT[x][0], recT[x][1], recT[x][0] + recT[x][2], recT[x][1] + recT[x][3])) if recT[x][0] == 0 and recT[x][1] == 0 and recT[x][2] == 0 and recT[x][3] == 0: print("↑--------↑--------↑--------↑ recT : " + x + " is error↑--------↑--------↑") continue sFPN = jwkj_get_filePath_fileName_fileExt(filePath)[0] + "tmp/" + jwkj_get_filePath_fileName_fileExt(filePath)[ 1] + "/" + jwkj_get_filePath_fileName_fileExt(filePath)[ 1] + "_" + x + ".jpg" sp.save(sFPN) if debug == False: # if (x != 'invoiceNo'): # # 测试如此识别并不能修正字体不能识别的问题 if isusebaidu: midResult = OcrPic(sFPN) else: midResult = newOcr(sFPN, model) # else: # midResult = OcrNoPic(sFPN) print(midResult + ' isUseBaidu: ' + isusebaidu) ocrResult[x] = midResult print(ocrResult) pC = SemanticCorrect.posteriorCrt.posteriorCrt() if typeT == 11 and debug == False: import OcrForVat if ocrResult['invoiceDate'][:4] == '开票日期' or len(ocrResult['invoiceDate']) < 4: recT['invoiceDate'] = OcrForVat.mubanDetectInvoiceDate(filePath)['invoiceDate'] sp = img.crop((recT['invoiceDate'][0], recT['invoiceDate'][1], recT['invoiceDate'][0] + recT['invoiceDate'][2], recT['invoiceDate'][1] + recT['invoiceDate'][3])) sFPN = jwkj_get_filePath_fileName_fileExt(filePath)[0] + "tmp/" + \ jwkj_get_filePath_fileName_fileExt(filePath)[ 1] + "/" + jwkj_get_filePath_fileName_fileExt(filePath)[ 1] + "_" + 'invoiceDateFix' + ".jpg" sp.save(sFPN) midResult = OcrPic(sFPN) print('invoiceDateFix: ' + midResult) ocrResult['invoiceDate'] = midResult js = InterfaceType.JsonInterface.invoice() if typeT == 11: pC.setVATParaFromVATDict(ocrResult) pC.startVATCrt() js.setValueWithDict(pC.VATdic) jsoni = js.dic else: pC.setTrainTicketParaFromDict(ocrResult) pC.startTrainTicketCrt() js.setValueWithDict(pC.dic) jsoni = js.dic return json.dumps(jsoni).encode().decode("unicode-escape")
class Map(Rectangle): def __init__(self, map_image, map_path): super(Map, self).__init__() self._map_image = map_image self._map_path = map_path self._ocr = OCR() self._margin_left = self.MARGIN_LEFT self._margin_top = self.MARGIN_TOP self._margin_right = self.MARGIN_RIGHT self._margin_bottom = self.MARGIN_BOTTOM self._x = OCR.INVALID_COORDINATE self._y = OCR.INVALID_COORDINATE self._width = OCR.INVALID_COORDINATE self._height = OCR.INVALID_COORDINATE self._RefreshCoordinates() def IsValid(self): if not self._map_image: return False if (self._x == OCR.INVALID_COORDINATE or self._y == OCR.INVALID_COORDINATE or self._width == OCR.INVALID_COORDINATE or self._height == OCR.INVALID_COORDINATE): return False if self._width < 0.001 or self._width > 0.05: return False if self._height < 0.001 or self._height > 0.05: return False _, _, width, height = self._GetMapGeometry() height_pixel_ratio = self.GetHeight() / height width_pixel_ratio = self.GetWidth() / width # The ratio should not be very different from each other, otherwise # we OCR'ed one of the coordinates wrong. if (abs(height_pixel_ratio) - abs(width_pixel_ratio)) > 0.0001: return False return True def Dispose(self): self._map_image.close() self._map_image = None self._ocr = None def GetPath(self): return self._map_path def GetMapImage(self): if not self.IsValid(): return None # The reality here is we store a bit more than just the # map image, but the whole contents of the PDF including # the coordinates, logos, etc. So when the map is requested, # we need to slice it from this image. return self._CropGeometry(*self._GetMapGeometry()) def GetX(self): if self._x != OCR.INVALID_COORDINATE: return self._x x, y, map_width, map_height = self._GetMapGeometry() offset = _BBOX_OFFSET_ width = _BBOX_WIDTH_ height = _BBOX_HEIGHT_ y_offset = y + map_height # Getting the X coordinate from the bottom left corner because # it turns out that the upper left is often corrupted. image = self._CropGeometry(x, y_offset + offset, width, height) self._x = self._ocr.GetDecimalDegrees(image) return self._x def GetY(self): if self._y != OCR.INVALID_COORDINATE: return self._y x, y, _, _ = self._GetMapGeometry() offset = _BBOX_OFFSET_ width = _BBOX_HEIGHT_ height = _BBOX_WIDTH_ image = self._CropGeometry(x - offset - width, y, width, height) image.rotate(90) self._y = self._ocr.GetDecimalDegrees(image) return self._y def GetWidth(self): if self._width != OCR.INVALID_COORDINATE: return self._width self._width = self._GetX2() if self._width != OCR.INVALID_COORDINATE: self._width -= self.GetX() return self._width def GetHeight(self): if self._height != OCR.INVALID_COORDINATE: return self._height self._height = self._GetY2() if self._height != OCR.INVALID_COORDINATE: self._height = abs(self._height - self.GetY()) return self._height def SaveMapImageAsPNG(self, basename): image = self.GetMapImage() image.resize(image.width / 2, image.height / 2) image.save(filename="%s.png" % basename) def SaveMapImageAsTIFF(self, basename): image = self.GetMapImage() # If we don't add the alpha channel, the tiles will get an empty # black background. image.alpha_channel = True # Set compression to LZW, using the low level API here because # this function is not supported by the high level bindings yet. library.MagickSetCompression(image.wand, 11) # TODO: Ideally we should save as a GeoTIFF with the coordinates # tags. That would spare us one unnecessary step on the pipeline for # generating tiles. image.save(filename="%s.tif" % basename) def _GetX2(self): x, y, map_width, map_height = self._GetMapGeometry() offset = _BBOX_OFFSET_ width = _BBOX_WIDTH_ height = _BBOX_HEIGHT_ x_offset = x + map_width y_offset = y + map_height image = self._CropGeometry(x_offset - width, y_offset + offset, width, height) return self._ocr.GetDecimalDegrees(image) def _GetY2(self): x, y, map_width, map_height = self._GetMapGeometry() offset = _BBOX_OFFSET_ width = _BBOX_HEIGHT_ height = _BBOX_WIDTH_ x_offset = x + map_width y_offset = y + map_height image = self._CropGeometry(x_offset + offset, y_offset - height, width, height) image.rotate(90) return self._ocr.GetDecimalDegrees(image) def _RefreshCoordinates(self): self.GetWidth() self.GetHeight() if self.IsValid(): return # We have some corrupted PDFs with the top margin # misplaced compared to the "sane" PDFs. We try # to detect the margin and ultimately, interpolate # the upper coordinate using degrees per pixel. left, top, right, bottom = _GetMargins(self._map_image) self._margin_left = left self._margin_top = top self._margin_right = right self._margin_bottom = bottom # Margin detection can really screw up things. I'm being # a bit lazy here and not checking for the scenarios that # can go wrong. Pokemon exception handling FTW. try: self.GetWidth() self.GetHeight() except: return if self.IsValid(): return y2 = self._GetY2() if y2 == OCR.INVALID_COORDINATE: return pixel_width = self.WIDTH - self._margin_left - self._margin_right pixel_height = self.HEIGHT - self._margin_top - self._margin_bottom self._height = self._width / float(pixel_width) * pixel_height self._y = y2 + self._height def _CropGeometry(self, x1, y1, width, height): x2 = x1 + width y2 = y1 + height return self._map_image[x1:x2, y1:y2] def _GetMapGeometry(self): width = self.WIDTH - self._margin_left - self._margin_right height = self.HEIGHT - self._margin_top - self._margin_bottom return self._margin_left, self._margin_top, width, height