def pdf2text(pdf_filename): tool = pyocr.get_available_tools()[0] lang = tool.get_available_languages()[1] req_image = [] final_text = [] image_pdf = Image(filename=pdf_filename, resolution=300) image_jpeg = image_pdf.convert('jpeg') for img in image_jpeg.sequence: img_page = Image(image=img) req_image.append(img_page.make_blob('jpeg')) for img in req_image: txt = tool.image_to_string( PI.open(io.BytesIO(img)), lang=lang, builder=pyocr.builders.TextBuilder() ) final_text.append(txt) return final_text
def image_to_string(self, filename): tools = pyocr.get_available_tools() if len(tools) == 0: raise PyOCRIntegrationNoOCRFound('No OCR tool has been found on ' 'this system. Make sure it\'s on' 'PATH variable of your system') filename_split, fileextension_split = os.path.splitext(filename) grayscale_filename = filename_split + '_gray' + fileextension_split with WandImage(filename=filename) as img: img.type = 'grayscale' img.save(filename=grayscale_filename) adaptive_thresh_filename = filename_split + '_adt' + fileextension_split OpenCVIntegration.adaptive_threshold(filename, adaptive_thresh_filename) processes = [] for tool in tools: if tool.get_name() == "Tesseract": thread_t = self._OCRProcessingThread(tool, self.lang, filename) thread_t.start() processes.append(thread_t) else: thread_c_raw = self._OCRProcessingThread(tool, self.lang, filename) thread_c_raw.start() processes.append(thread_c_raw) thread_c_gs = self._OCRProcessingThread(tool, self.lang, grayscale_filename) thread_c_gs.start() processes.append(thread_c_gs) thread_c_prd = self._OCRProcessingThread(tool, self.lang, adaptive_thresh_filename) thread_c_prd.start() processes.append(thread_c_prd) # Wait this all threads finish processing result = [] threads_running = True while threads_running: found_thread_alive = False for p in processes: if p.is_alive(): found_thread_alive = True if not found_thread_alive: threads_running = False for p in processes: result.append(p.return_value) # Removing generated files self._cleanup(grayscale_filename) self._cleanup(adaptive_thresh_filename) return result
def testPdf(self, pdfFile, border): reCaseSubmitted = re.compile(ur'The case is submitted\.\s*\(Whereupon,', re.UNICODE) reCaseSubmitted2 = re.compile(ur'Case is submitted\.\s*\(Whereupon,', re.UNICODE) reCaseSubmitted3 = re.compile(ur'\(?Whereupon, at \w\w:\w\w', re.UNICODE) # the most-relaxed tool = pyocr.get_available_tools()[0] lang = tool.get_available_languages()[1] req_image = [] final_text = [] # this could've been done better, but for now will do idx = 0 with Image(filename = pdfFile, resolution=self.__dpi) as img: # if self.__debug: # print "Parsing Page: " + str(idx + 1) cloneImg =img cloneImg.alpha_channel = False self.evaluate( cloneImg, 'threshold', self.__threshold) txt1 = tool.image_to_string( PI.open(io.BytesIO(cloneImg[:, :border].make_blob('png'))), lang=lang, builder=pyocr.builders.TextBuilder())+ "\n" txt2 = tool.image_to_string( PI.open(io.BytesIO(cloneImg[:, border:].make_blob('png'))), lang=lang, builder=pyocr.builders.TextBuilder())+ "\n" txt = txt1+txt2 test = self._clean_text(txt1) print(txt1) print(txt2) if reCaseSubmitted.search(txt) != None or reCaseSubmitted2.search(txt) != None or reCaseSubmitted3.search(txt) != None: print( 'TRUE' ) else: print('FALSE')
def test(image_name): """ 只能识别验证码在正中间的情况 :param image_name: :return: """ with Image.open(image_name) as image: # 把彩色图像转化为灰度图像。彩色图像转化为灰度图像的方法很多,这里采用RBG转化到HSI彩色空间,采用L分量。 image = image.convert("L") # 需要把图像中的噪声去除掉。这里的图像比较简单,直接阈值化就行了。我们把大于阈值threshold的像素置为1,其他的置为0。对此,先生成一张查找表,映射过程让库函数帮我们做。 image = cut_noise(image) tools = pyocr.get_available_tools() if len(tools) == 0: print("No OCR tool found") sys.exit(1) tool = tools[0] image.save("test.jpg") txt = tool.image_to_string(image, lang="eng", builder=pyocr.builders.TextBuilder()) # Digits - Only Tesseract digits = tool.image_to_string(image, lang="eng", builder=pyocr.tesseract.DigitBuilder()) print(txt) print(digits)
def readFile(self, pdfFile, mode = 'top-right' ): mode = 0 if mode == 'top-right' else 1 self.__img_crop = self.__img_crop_modes[mode] tool = pyocr.get_available_tools()[0] lang = tool.get_available_languages()[1] # this could've been done better, but for now will do reCaseSubmitted = re.compile(ur'The case is submitted\.\s*\(Whereupon,', re.UNICODE) reCaseSubmitted2 = re.compile(ur'Case is submitted\.\s*\(Whereupon,', re.UNICODE) reCaseSubmitted3 = re.compile(ur'\(?Whereupon, at \w\w:\w\w', re.UNICODE) # the most-relaxed # get file name helper = FileHelper() outfp = open(self.__outputDir + helper.GetFileName(pdfFile) + ".plain", 'w') with Image(filename=pdfFile, resolution=self.__dpi) as image_pdf: image_pngs = image_pdf.convert('png') idx = 0 output_text = '' for img in image_pngs.sequence: if self.__debug: print "Parsing Page: " + str(idx + 1) cloneImg = img[self.__img_crop[0] : self.__img_crop[2], self.__img_crop[1] : self.__img_crop[3] ] cloneImg.alpha_channel = False # cloneImg.save(filename = './img_{}.png'.format(idx)) self.evaluate( cloneImg, 'threshold', self.__threshold) txt = tool.image_to_string( PI.open(io.BytesIO(cloneImg.make_blob('png'))), lang=lang, builder=pyocr.builders.TextBuilder())+ "\n" output_text = output_text + self._clean_text(txt) if reCaseSubmitted.search(txt) != None or reCaseSubmitted2.search(txt) != None or reCaseSubmitted3.search(txt) != None: break idx += 1 outfp.write(output_text)
def pdf2ocr(pdffile): """ Optical Character Recognition on PDF files using Python see https://pythontips.com/2016/02/25/ocr-on-pdf-files-using-python/ :param pdffile: pdffile to be OCR'd :return: """ from wand.image import Image from PIL import Image as PI import pyocr import pyocr.builders import io tool = pyocr.get_available_tools()[0] lang = tool.get_available_languages()[0] # [0] for english req_image = [] final_text = [] print "Reading {0}".format(pdffile) image_pdf = Image(filename=pdffile, resolution=300) image_jpeg = image_pdf.convert("jpeg") for img in image_jpeg.sequence: img_page = Image(image=img) print ("appending image") req_image.append(img_page.make_blob("jpeg")) print "Generating text" for img in req_image: txt = tool.image_to_string(PI.open(io.BytesIO(img)), lang=lang, builder=pyocr.builders.TextBuilder()) final_text.append(txt) return final_text
def build_config_info(): '''Builds configuration information about installed OCR software''' tools = pyocr.get_available_tools() infos = [{'name': tool.get_name(), 'langs': tool.get_available_languages()} for tool in tools] return infos
def extract_text(filename, output_filename): tools = pyocr.get_available_tools() if len(tools) == 0: print("No OCR tool found") sys.exit(1) # The tools are returned in the recommended order of usage tool = tools[0] print("Will use tool '%s'" % (tool.get_name())) langs = tool.get_available_languages() print("Available languages: %s" % ", ".join(langs)) lang = langs[0] print("Will use lang '%s'" % (lang)) txt = tool.image_to_string( Image.open(filename), lang=lang, builder=pyocr.builders.TextBuilder() ) with open(output_filename, 'w') as txtfile: txtfile.write(txt.encode("UTF-8")) print "Text contents saved as '{}'".format(output_filename)
def picture2text(self, picture): tools = pyocr.get_available_tools() tool = tools[0] self.txt = tool.image_to_string( picture, lang="jpn", builder=pyocr.builders.TextBuilder(tesseract_layout=6) )
def get_named_ocr_tool(toolname): '''For a given tool name, this function will return an ocr tool''' tools = pyocr.get_available_tools() for tool in tools: if tool.get_name() == toolname: return tool return None
def GetLanguages(cls): lang = '' for tool in pyocr.get_available_tools(): if tool.get_name() == 'Tesseract (sh)': ocr = tool break if ocr: lang = '+'.join(ocr.get_available_languages()) return lang
def __init__(self): #初始化浏览器和ocr tool self.driver = webdriver.Firefox(executable_path='./geckodriver') tools = pyocr.get_available_tools() if len(tools) == 0: sys.exit(1) self.tool = tools[0] self.im = None self.path = None self.crop_xy = [(35, 300, 770, 610), (35, 550, 770, 720), (35, 730, 770, 830), (35, 840, 770, 940)]
def ocr_recipe_name(coords, img): img = img.crop(coords) tool = pyocr.get_available_tools()[0] lang = tool.get_available_languages()[2] recipe_name = tool.image_to_string( img, lang=lang, builder=pyocr.builders.TextBuilder() ) recipe_name = recipe_name.replace('"', '') \ .replace(' ', '_').replace('/', '_').replace('(', '').replace(')', '').replace('!', '').lower() return recipe_name
def recognition_phone(phone_img_file): tools = pyocr.get_available_tools() tool = tools[0] langs = tool.get_available_languages() lang = langs[1] phone_text = tool.image_to_string(Image.open(phone_img_file), lang=lang, builder=pyocr.builders.TextBuilder()) print phone_text return phone_text
def __init__(self): self.ocr = None self.lang = None for tool in pyocr.get_available_tools(): if tool.get_name() == 'Tesseract (sh)': self.ocr = tool break if self.ocr: self.lang = '+'.join(self.ocr.get_available_languages())
def ptoi(name): with open(name,'r') as f: img = Image.open(name) img = img.convert('1') img.save('tmp.bmp') tools = pyocr.get_available_tools() if len(tools) == 0: print('No OCR tool found!') sys.exit(1) print("Using '%s'" % (tools[0].get_name())) return tools[0].image_to_string(img)
def image_to_string(args): img, lang = args ocr = pyocr.get_available_tools()[0] with Image.open(os.path.join(Consumer.SCRATCH, img)) as f: if ocr.can_detect_orientation(): try: orientation = ocr.detect_orientation(f, lang=lang) f = f.rotate(orientation["angle"], expand=1) except TesseractError: pass return ocr.image_to_string(f, lang=lang)
def check_required_software(): logger = logging.getLogger(__name__) tools = pyocr.get_available_tools() if len(tools) == 0: raise PyOCRIntegrationNoOCRFound('No OCR tool has been found on ' 'this system. Make sure it\'s on') elif len(tools) == 1: logger.info("I've found only one ocr tool [%s]. This is not exactly " "an error but you should get better results if you have " "both Tesseract and Cuneiform installed" % tools[0].get_name()) else: logger.info("I've found all required software. We're good to go =)")
def ocr(self, img, angles=None): """ Returns immediately. Listen for the signal ocr-done to get the result """ if (not self.__config['ocr_enabled'].value or len(pyocr.get_available_tools()) == 0): angles = 0 elif angles is None: angles = 4 img.load() job = self.factories['ocr'].make(img, angles) self.schedulers['ocr'].schedule(job) return job
def get_default_ocr_lang(): # Try to guess based on the system locale what would be # the best OCR language ocr_tools = pyocr.get_available_tools() if len(ocr_tools) == 0: return DEFAULT_OCR_LANG ocr_langs = ocr_tools[0].get_available_languages() lang = find_language() if hasattr(lang, 'iso639_3_code') and lang.iso639_3_code in ocr_langs: return lang.iso639_3_code if hasattr(lang, 'terminology') and lang.terminology in ocr_langs: return lang.terminology return DEFAULT_OCR_LANG
def ocr_img(self, imgname): """ To use a non-standard language pack named foo.traineddata, set the TESSDATA_PREFIX environment variable so the file can be found at TESSDATA_PREFIX/tessdata/foo.traineddata and give Tesseract the argument -l foo. """ tools = pyocr.get_available_tools() tool = tools[0] text = tool.image_to_string( Image.open(imgname), lang='eng', builder=pyocr.builders.TextBuilder(), ) print(text) return text
def getscreenimage(): conn = httplib.HTTPConnection(lgtv["ipaddress"], port=8080) conn.request("GET", "/udap/api/data?target=screen_image") httpResponse = conn.getresponse() if httpResponse.reason != 'OK': return None htmlout = httpResponse.read() im = Image.open(StringIO.StringIO(htmlout)).convert('RGB') # ocr tools = pyocr.get_available_tools() if len(tools) == 0: print("No OCR tool found") sys.exit(1) tool = tools[0] lang = 'eng' # crop and ocr im_crop = {} for x in range(0, 4): box = ((65 + (140 * x)), 394, (65 + 120 + (140 * x)), 416) im_crop[x] = im.crop(box) ocrtxt = tool.image_to_string(im_crop[x], lang=lang, builder=pyocr.builders.TextBuilder()) lgtvch[x] = ocrtxt #print ocrtxt # ocr out # TV # HDMH # RGBiPC # HDMIZ # get ch ch = '' for x in range(1, 5): r, g, b = im.getpixel(((130 * x), 430)) if r < 180 and g < 50 and b < 60: ch = x break if ch: return ch else: return None
def pdf_to_text(filename): tool = pyocr.get_available_tools()[0] lang = tool.get_available_languages()[0] req_image = [] final_text = [] image_pdf = Image(filename=filename, resolution=500, format='pdf') image_jpeg = image_pdf.convert('jpeg') for img in image_jpeg.sequence: img_page = Image(image=img) req_image.append(img_page.make_blob('jpeg')) for img in req_image: txt = tool.image_to_string(PI.open(io.BytesIO(img)), lang=lang, builder=pyocr.builders.TextBuilder()) final_text.append(txt) return final_text[0]
def imgOCR(path, filename): # 1.インストール済みのTesseractのパスを通す path_tesseract = "C:\\Program Files\\Tesseract-OCR" if path_tesseract not in os.environ["PATH"].split(os.pathsep): os.environ["PATH"] += os.pathsep + path_tesseract # 2.OCRエンジンの取得 tools = pyocr.get_available_tools() tool = tools[0] # 3.原稿画像の読み込み img_org = Image.open(path + filename) # 4.OCR実行 builder = pyocr.builders.TextBuilder() result = tool.image_to_string(img_org, lang="jpn", builder=builder) sys.stderr.write(result + '\n') return result
def __init__(self): # there are three possible tools self.tools = pyocr.get_available_tools() self.maxsize = 1028 #self.size = if len(self.tools) == 0: print("No OCR tools found") sys.exit(1) else: tool_names = [tool.get_name() for tool in self.tools] try: index = tool_names.index('Tesseract (sh)') self.tool = self.tools[index] print("Will use tool '%s'" % (tool.get_name())) except: print "exception happened" sys.exit(1) self.langs = tool.get_available_languages()
def OCR(): global txt print('OCR start') ###OCR### tools = pyocr.get_available_tools() if len(tools) == 0: #print("No OCR tool found") sys.exit(1) tool = tools[0] #print("Will use tool '%s'" % (tool.get_name())) langs = tool.get_available_languages() #print("Available languages: %s" % ", ".join(langs)) txt = tool.image_to_string(Image.open('img/test.jpeg'), lang='jpn', builder=pyocr.builders.TextBuilder()) print(txt)
def imagetotext(self): tool = pyocr.get_available_tools()[0] builder = pyocr.builders.TextBuilder() txt = tool.image_to_string(Image.open(self.filepath), lang=self.ocrlang, builder=pyocr.builders.TextBuilder()) #strip picture file extension self.outputname = self.filepath.replace('.png', '').replace( '.jpg', '').replace('.gif', '').replace('.tiff', '') self.outputname = self.outputname.replace('.PNG', '').replace( '.JPG', '').replace('.GIF', '').replace('.TIFF', '') #write to output file with codecs.open(self.outputname, 'w', encoding='utf-8') as file_descriptor: builder.write_file(file_descriptor, txt)
def image_to_text(img_name): tools = pyocr.get_available_tools() if len(tools) == 0: print('No OCR tool found') sys.exit(1) tool = tools[0] #print('Use tool {}'.format(tool.get_name())) langs = tool.get_available_languages() #print('Available languages are {}'.format(langs)) txt = tool.image_to_string( Image.open(img_name), #If you want japanese version, change 'eng' to 'jpn' lang='eng', builder=pyocr.builders.TextBuilder()) return txt
def verify_text_present_on_image(name, text): take_screen_shot(name) name = os.getcwd() + "/images/" + name + '.PNG' tools = pyocr.get_available_tools() tool = tools[0] langs = tool.get_available_languages() lang = langs[0] rendered_text = tool.image_to_string(Image.open(name), lang=lang, builder=pyocr.builders.TextBuilder()) #print (str(rendered_text)) print('text=%s' % rendered_text) if rendered_text.find(text) == -1: screenshotLib = BuiltIn().get_library_instance('Screenshot') screenshotLib._embed_screenshot(name, 200) raise Exception('The text %s is not displayed in the image' % text)
def image_to_string(args): """ I have no idea why, but if this function were a method of Consumer, it would explode with: `TypeError: cannot serialize '_io.TextIOWrapper' object`. """ png, lang = args ocr = pyocr.get_available_tools()[0] with Image.open(os.path.join(Consumer.SCRATCH, png)) as f: if ocr.can_detect_orientation(): try: orientation = ocr.detect_orientation(f, lang=lang) f = f.rotate(orientation["angle"], expand=1) except TesseractError: pass return ocr.image_to_string(f, lang=lang)
def put(self, args): doc = args.document title = args.title if args.title else os.path.basename(doc.name) labels = args.labels doc_raw = doc.read() mime_type = magic.from_buffer(doc_raw, mime=True) if (type(doc_raw) is str): # This is the case for stdin doc_blob = sqlite3.Binary( bytearray(doc_raw, locale.getdefaultlocale()[1])) else: doc_blob = sqlite3.Binary(doc_raw) if mime_type == 'application/pdf': if (shutil.which('pdftotext')): doc_text = subprocess.run(['pdftotext', doc.name, '-'], stdout=subprocess.PIPE).stdout else: print( 'Cannot put PDF file, please make sure `pdftotext` is in your path.' ) return 1 elif mime_type.startswith('image/'): tools = pyocr.get_available_tools() if len(tools) == 0: print('Cannot put image file, could not find any OCR tool.') return 1 tool = tools[0] print('Using `{}` for OCR'.format(tool.get_name())) doc_text = tool.image_to_string( Image.open(doc), builder=pyocr.builders.TextBuilder()) elif mime_type.startswith('text/'): doc_text = doc_blob else: print('Unsupported mime type "{}"'.format(mime_type)) return 1 put_sql = 'INSERT INTO documents(title, labels, mime, text_data, raw_data) VALUES (?, ?, ?, ?, ?)' self.cursor.execute(put_sql, (title, labels, mime_type, doc_text, doc_blob)) self.conn.commit() print('{} has been added to faldone'.format(title))
def getText(): tools = pyocr.get_available_tools() tool = tools[0] langs = tool.get_available_languages() param = tool.image_to_string( Image.open(paramImg), lang='eng', builder=pyocr.builders.DigitBuilder()).split() preParam = tool.image_to_string( Image.open(preParamImg), lang='eng', builder=pyocr.builders.DigitBuilder()).split() while (len(param) != 4): time.sleep(0.3) print('成長後のステが取れなかったのでもう一度') getSS() param = tool.image_to_string( Image.open(paramImg), lang='eng', builder=pyocr.builders.DigitBuilder()).split() print(preParam) print(param) strength = int(param[0]) - int(preParam[0]) agile = int(param[1]) - int(preParam[1]) inte = int(param[2]) - int(preParam[2]) physical = int(param[3]) - int(preParam[3]) total = strength * 4 + physical * 3 + agile + inte print(total) if (total >= 0): print(total) print('保存') clickRight() # 結果が表示される。邪魔なので3秒待つ。 time.sleep(3) else: print(total) print('閉じる') clickLeft()
def main(): # OCR のロード tools = pyocr.get_available_tools() if len(tools) == 0: print("No OCR tool found") sys.exit(1) tool = tools[0] hitman = Hitman() hitman.initialize() # 学習済みのSVMモデル clf = joblib.load('model.pkl') margin = 7 i = 0 # initialize the camera and grab a reference to the raw camera capture camera = PiCamera() camera.resolution = (1408, 944) # camera.resolution = (2592, 1952) # camera.resolution = (3280, 2464) try: camera.contrast = 70 camera.start_preview() time.sleep(5) camera.stop_preview() a = input('Ready. Please press return to start.') while True: with PiRGBArray(camera) as stream: camera.capture(stream, format='bgr') # At this point the image is available as stream.array img = stream.array # img = frame.array if GPIO.input(14) == GPIO.HIGH: #緊急脱出 print('ウィザードモード発動!') #自作タイピングゲームでは、カンマを入力すると文字をスキップできる hitman.hit_keys(',', 0.01) continue txt = extract_characters(img, clf, margin, tool) hitman.hit_keys(txt, 0.02) print('end. next!!!!!!!!!!!!!!!!!') time.sleep(0.2)#成功したときに、画面の切り替わりに時間がかかる #i += 1 except: print(traceback.format_exc())
def show_frame(): _, frame = cap.read() cv2.rectangle(frame, (100, 470), (500, 400), (255, 0, 0), 3) ro = frame[400:470, 100:500] gray = cv2.cvtColor(ro, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (5, 5), 0) gray = cv2.bilateralFilter(gray, 9, 75, 75) gray = cv2.equalizeHist(gray) cars = car_cascade.detectMultiScale(gray, 1.1, 1) for (x, y, w, h) in cars: cv2.rectangle(ro, (x, y), (x + w, y + h), (0, 0, 255), 2) img3 = ro[y:y + h, x:x + w] con = np.array([[[x, y]], [[x + w, y]], [[x + w, y + h]], [[x, y + h]]]) img4 = cv2.medianBlur(img3, 3) img4 = cv2.threshold(img4, 127, 255, cv2.THRESH_BINARY)[1] img4 = cv2.medianBlur(img4, 3) img4 = cv2.cvtColor(img4, cv2.COLOR_BGR2GRAY) cv2.imwrite('l3.jpg', img4) imh = Image.fromarray(img4) tools = pyocr.get_available_tools()[0] str = tools.image_to_string(imh, builder=pyocr.builders.DigitBuilder()) t.delete("1.0", tk.END) t.insert(tk.END, str) t.pack(side="left", padx=5, pady=5) img4tk = ImageTk.PhotoImage(image=imh) l1.img4tk = img4tk l1.configure(image=img4tk) img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = Image.fromarray(img) imgtk = ImageTk.PhotoImage(image=img) lmain.imgtk = imgtk lmain.configure(image=imgtk) lmain.after(10, show_frame)
def __init__(self, url): """ The constructor for PYOCR class. Parameters: url (string): The path to the chosen jpg file. """ super(PYOCR, self).__init__(url) pyocr.tesseract.TESSERACT_CMD = self.OCR_path self.tools = pyocr.get_available_tools() if len(self.tools) == 0: print("No OCR tool found") sys.exit(1) self.tool = self.tools[0] self.langs = self.tool.get_available_languages() self.url = url self.temp_src = '../Images/Natural Images/temp.jpg' #path to the temporary image that will be used to scan for text self.i = 0 self.result = None
def get_individual_id(self, img_path: str) -> str: # setting pyocr tools = pyocr.get_available_tools() tool = tools[0] # text detection txt = tool.image_to_string( Image.open(img_path), lang="jpn", builder=pyocr.builders.TextBuilder(tesseract_layout=6), ) # get wagyu id regex = re.compile("\d{10}") match = regex.findall(txt) if match: individual_id = match[0] return individual_id
def convert_to_txt(img_files): # get OCR tools you installed tools = pyocr.get_available_tools() if len(tools) == 0: print('no tools') return # use Tesseract (first of tools list) tool = tools[0] for img_file in img_files: # read as grayscale image gray_img = cv2.imread(img_file, cv2.IMREAD_GRAYSCALE) # convert to binary image _, binary_img = cv2.threshold(gray_img, 220, 255, cv2.THRESH_BINARY) txt = tool.image_to_string(image=Image.fromarray(binary_img), lang='jpn', builder=pyocr.builders.TextBuilder(tesseract_layout=6)) print(txt) break
def scan_text(image): tool = pyocr.get_available_tools()[0] # tesseract # tool = pyocr.get_available_tools()[1] # libtesseract # tool = pyocr.get_available_tools()[2] # cuneiform # lang = tool.get_available_languages()[1] lang = 'eng' builder = pyocr.builders.TextBuilder() builder = pyocr.builders.LineBoxBuilder(tesseract_layout=3) # builder.tesseract_flags.append("-c") # builder.tesseract_flags.append("preserve_interword_spaces=1") return tool.image_to_string( # image.convert('1', dither = PI.NONE), image, lang=lang, builder=builder )
def compute_pdf_text_ocr(self): tool = pyocr.get_available_tools()[0] lang = tool.get_available_languages()[1] req_image = [] final_text = [] image_pdf = Image(filename=self.path, resolution=300) image_jpeg = image_pdf.convert('jpeg') for img in image_jpeg.sequence: img_page = Image(image=img) req_image.append(img_page.make_blob('jpeg')) for img in req_image: txt = tool.image_to_string(PI.open(io.BytesIO(img)), lang=lang, builder=pyocr.builders.TextBuilder()) final_text.append(txt) self.raw_pages = final_text
def lolObserver(): global response config = json.load(open(configName, 'r')) #ゲームクライアントの原点座標(通常[0,0]) game_pos_offset = config["game_pos_offset"] #レベル周りの座標設定 level_pos_offset = config["level_pos_offset"] level_box_size = config["level_box_size"] level_X_interval = config["level_X_interval"] level_Y_interval = config["level_Y_interval"] level_boxes = levelBoxes(game_pos_offset, level_pos_offset, level_X_interval, level_Y_interval, level_box_size) champion_state = initState(level_boxes) levelImgBuff = initLevelImgBuff(screenShot(), level_boxes) auto_notice_levels = [6, 11, 16] auto_notice_frags = initFrags(auto_notice_levels) tools = pyocr.get_available_tools() if len(tools) == 0: print('pyocrが見付かりません。pyocrをインストールして下さい。') timers = timeInit(1) dt = [1] state_update = False while True: if getTime() - timers[0] > dt[0]: img = screenShot() levelCheck(img, champion_state, level_boxes, levelImgBuff) state_update = True timers[0] = getTime() if state_update: printData(champion_state, auto_notice_levels, auto_notice_frags) #printState() #printJSON() print(data) response = [json.dumps(data).encode("utf-8")] state_update = False
def get_text_via_ocr(pdf, dpi=300): """ This Function extracts the text present in image based resumes. :param pdf: string - name of the pdf document :param dpi: dpi value, default 300 :return: """ if '.pdf' in pdf: return get_text(pdf) finaltext = '' try: tool = pyocr.get_available_tools()[0] lang = 'eng' except IndexError: return req_image = [] image_pdf = Image(resolution=dpi, filename=pdf) image_jpeg = image_pdf.convert('jpeg') for img in image_jpeg.sequence: img_page = Image(image=img) try: req_image.append(img_page.make_blob('jpeg')) except BlobError: return finaltext for img in req_image: txt = tool.image_to_string(PI.open(io.BytesIO(img)), lang=lang, builder=pyocr.builders.TextBuilder()) finaltext = finaltext + txt del image_jpeg.sequence del image_pdf del image_jpeg del img_page del txt asciidata = finaltext.encode("ascii", "ignore") finaltext = asciidata return finaltext
def parse_image(self,filepath): try: tools = pyocr.get_available_tools()[:] if len(tools) == 0: print("No OCR tool found") tool = tools[0] txt = tool.image_to_string( Image.open(filepath), lang="chi_sim", builder=pyocr.builders.TextBuilder(7) ) if txt is None: txt="null" return txt except Exception , e: print e return "null"
def image_to_num(c2): c4 = Image.fromarray(c2) tools = pyocr.get_available_tools() tool = tools[0] builder = pyocr.builders.DigitBuilder() # Set Page Segmentation mode to Single Char : builder.tesseract_layout = 10 # If tool = tesseract builder.tesseract_flags = ['-psm', '6'] # If tool = libtesseract text = tool.image_to_string(c4, lang="eng", builder=builder) #text = pytesseract.image_to_string(c4,lang='eng') #print("num:",text) if text == '.' or text == '' or len(text) > 1: text = 0 return text
def GenerateDoc(source_pdfs, folder, jpg, pages, variable_match): from os import system import re import pyocr import pyocr.builders from PIL import Image # Get input file path startnum = jpg.rfind('/') endnum = jpg.find('_', startnum) id_file = int(jpg[startnum + 1:endnum]) inputfile = source_pdfs[id_file] inputfile = inputfile[1] # inputpdf = PdfFileReader(open(inputfile, "rb"), strict=False) row_data_csv = [id_file, inputfile, pages] for i in range(len(variable_match)): cropimage(folder, jpg, variable_match[i][1][0], variable_match[i][1][1], variable_match[i][2][0], variable_match[i][2][1], 'id_' + str(i)) jpg_name = jpg[startnum + 1:] jpg_num = folder + 'croped/' + 'id_' + str(i) + jpg_name # Convert image into text mode tools = pyocr.get_available_tools()[0] text = tools.image_to_string(Image.open(jpg_num), builder=pyocr.builders.DigitBuilder()) # Generate Doc Number # doc_num = re.findall(r'\d+/\d+', text_num) doc_num = re.findall(variable_match[i][3], text) # if doc_num is None: if len(doc_num) == 0: doc_num = 'Not Found' # Saving pdf and creating report row_data_csv.append(doc_num) GenerateReport(row_data_csv, False)
def Mod_OCR(DIGIT_IMAGE, DATE): #letsgodigitalライブラリを使ってOCRを実行 tools = pyocr.get_available_tools() tool = tools[0] IMG = Image.open(DIGIT_IMAGE) BUILDER = pyocr.builders.TextBuilder(tesseract_layout=6) OCR = tool.image_to_string(IMG, lang="letsgodigital", builder = BUILDER) # OCRで得られた数値をCSVへ日付データと共に出力するレコードへ変換 NEW_LINE = [DATE, OCR] # CSVへレコード追加 with open('data.csv', 'a') as CSV_FILE: writer = csv.writer(CSV_FILE, lineterminator='\n') writer.writerow(NEW_LINE) return
def main(): tools = pyocr.get_available_tools() if len(tools) == 0: print("No OCR tool found") sys.exit(1) # The tools are returned in the recommended order of usage tool = tools[0] print("Will use tool '%s'" % (tool.get_name())) langs = tool.get_available_languages() print("Available languages: %s" % ", ".join(langs)) for i in range(1, 16): file_name = './images/fake_{}.png'.format(str(i)) txt = tool.image_to_string( Image.open(file_name), lang="eng+jpn", builder=pyocr.builders.TextBuilder(tesseract_layout=6)) print("--{}--".format(file_name)) print(txt)
def optical_character_recognition(pillow_image): """ OCRを実行する。 入力する画像は pillow で読み込んだものを使用する。 """ tools = pyocr.get_available_tools() if len(tools) == 0: print('Available tool is not found.') tool = tools[0] txt = tool.image_to_string( pillow_image, lang = 'jpn', builder = pyocr.builders.TextBuilder(tesseract_layout=3) ) edit_txt = txt.translate(str.maketrans({',':None, ' ':None, '.':None, '。':None})) out_txt = re.sub(r'\\', '¥', edit_txt) return out_txt
def __init__(self, *args, **kwargs): super(PyOCR, self).__init__(*args, **kwargs) self.languages = () tools = pyocr.get_available_tools() if len(tools) == 0: raise OCRError('No OCR tool found') self.tool = tools[0] # The tools are returned in the recommended order of usage for tool in tools: if tool.__name__ == 'pyocr.libtesseract': self.tool = tool logger.debug('Will use tool \'%s\'', self.tool.get_name()) self.languages = self.tool.get_available_languages() logger.debug('Available languages: %s', ', '.join(self.languages))
def main(): args = parser.parse_args() if args.file_name is None: parser.print_usage() return elif args.invert not in ("yes", "no", "auto"): print "%s is not an invert mode." % (args.lang, ) print "Options are: %s" % (", ".join(("yes", "no", "auto")), ) return 1 else: ocr_tools = pyocr.get_available_tools() if args.lang not in ocr_tools[0].get_available_languages(): print "%s is not an available language." % (args.lang, ) print "Options are: %s" % (", ".join( ocr_tools[0].get_available_languages()), ) return 1 process_file(args.file_name, ocr_tools, args.lang, args.forcedonly, args.invert, args.background)
def old_solve(): """ pyocr 库识别, 识别率太低 """ tools = pyocr.get_available_tools() tool = tools[0] # Ex: Will use tool 'tesseract' print("Will use tool '%s'" % (tool.get_name())) language = tool.get_available_languages() print("Available languages: %s" % ", ".join(language)) language = language[1] # Ex: Will use lang 'fra' print("Will use language '%s'" % (language)) for i in range(10001): picture = Image.open("cap1/im{}.png".format(i)) picture = online_rotate_picture(picture) txt = tool.image_to_string(picture, builder=pyocr.builders.TextBuilder()) print(txt)
def __init__(self): ocr_language = 'eng' tools = pyocr.get_available_tools() if len(tools) == 0: print("No OCR tool found") sys.exit(1) self.tool = tools[0] print("OCR tool: %s" % self.tool) try: langs = self.tool.get_available_languages() self.lang = langs[0] if ocr_language in langs: self.lang = ocr_language print("OCR selected language: %s (available: %s)" % (self.lang.upper(), ", ".join(langs))) except Exception as e: print("{}".format(e))
def extract_number_from_one_orientation(im, orientation,lang='fra'): import pyocr import pyocr.builders from PIL import Image import numpy as np tools = pyocr.get_available_tools() tool = tools[1] im_r = im.rotate(orientation, expand=True) digits = tool.image_to_string(im_r, lang=lang, # builder=pyocr.builders.WordBoxBuilder() builder = pyocr.builders.TextBuilder() ) if not digits: return else: return digits
def __init__(self, args, imgsrc=None): self.args = args if imgsrc == None: self.imgsrc = ocr.imgsrc else: self.imgsrc = imgsrc # self.c_max = 169 self.c_max = ocr.c_max if args.threshold: self.c_max = args.threshold if ocr.tool != None: return # インストール済みのTesseractのパスを通す if ocr.path_tesseract not in os.environ["PATH"].split(os.pathsep): os.environ["PATH"] += os.pathsep + ocr.path_tesseract # OCRエンジンの取得 tools = pyocr.get_available_tools() # print(path_tesseract) # print(tools) ocr.tool = tools[0]
def tesseract_ocr(image_path): # OCRツールを指定 (「Tesseract」が[0]に収められていた) tools = pyocr.get_available_tools() if len (tools) == 0: print ("No Tesseract OCR tool found") tool = tools[0] # langs = tool.get_available_languages() # 画像を認識 with Image.open(image_path) as im1: # ビルダーの設定 builder = pyocr.builders.LineBoxBuilder(tesseract_layout=6) # テキスト抽出 res = tool.image_to_string( im1, lang= 'digits_comma', #lang_setting, # 言語を指定 builder=builder ) return res
def ocr_image(filepath, threshold=100): # OCRが使用可能かをチェック tools = pyocr.get_available_tools() if not tools: # print("Cannot use pyocr") return tool = tools[0] # OCR対応言語を表示 langs = tool.get_available_languages() print("Available languages: %s" % ", ".join(langs)) lang = 'eng' print("Will use lang '%s'" % lang) # 画像の前処理 # とりあえず2値化 img = Image.open(filepath) gray = img.convert('L') mono = gray.point(lambda x: 0 if x < threshold else 255) # 読み込んだ画像をOCRでテキスト抽出してみる。 res = tool.image_to_string( mono, lang=lang, builder=pyocr.builders.WordBoxBuilder(tesseract_layout=7)) res_im = mono.convert('RGB') res_draw = ImageDraw.Draw(res_im) txt_lis = [] fnt = ImageFont.truetype(r"fonts/Apple Symbols.ttf", size=20) for w in res: txt_lis.append(w.content) res_draw.rectangle((w.position[0], w.position[1]), None, (255, 0, 0), 2) res_draw.text((w.position[0][0] + 5, w.position[0][1] + 1), font=fnt, text=w.content, fill=(255, 0, 0)) txt = ''.join(txt_lis) return txt, res_im
def run_ocr(input_file, output_file): print("Running on ocr on " + input_file) tools = pyocr.get_available_tools() if len(tools) == 0: print("No OCR tool found") sys.exit(1) # The tools are returned in the recommended order of usage tool = tools[0] print("Will use tool '%s'" % (tool.get_name())) # Ex: Will use tool 'libtesseract' langs = tool.get_available_languages() #print("Available languages: %s" % ", ".join(langs)) lang = langs[0] print("Will use lang '%s'" % (lang)) builder = pyocr.builders.TextBuilder() req_image = [] final_text = [] # open PDF image and convert to PNG format image_pdf = Image(filename=input_file, resolution=300) image_jpeg = image_pdf.convert('jpeg') for img in image_jpeg.sequence: img_page = Image(image=img) req_image.append(img_page.make_blob('jpeg')) # loop through each page in required images for img in req_image: txt = tool.image_to_string(PI.open(io.BytesIO(img)), lang=lang, builder=builder) final_text.append(txt) output_file = open(output_file, 'w') for item in final_text: output_file.write("%s\n" % item)
def outputOCR(): tools = pyocr.get_available_tools() if len(tools) == 0: print("No OCR tool found") sys.exit(1) tool = tools[0] #print("Will use tool '%s'" % (tool.get_name())) #print("Will use tool '%s'" % (tool.get_name())) # Ex: Will use tool 'tesseract' langs = tool.get_available_languages() #print("Available languages: %s" % ", ".join(langs)) lang = langs[0] #print("Will use lang '%s'" % (lang)) # Ex: Will use lang 'fra' txt = tool.image_to_string(Image.open('output.png'), lang=lang, builder=pyocr.builders.TextBuilder()) #word_boxes = tool.image_to_string(Image.open('test.png'), # lang=lang, # builder=pyocr.builders.WordBoxBuilder()) #line_and_word_boxes = tool.image_to_string( # Image.open('test.png'), lang=lang, # builder=pyocr.builders.LineBoxBuilder()) txt = txt.replace("\r"," ") txt = txt.replace("\n"," ") txt = txt.replace(" "," ") txt = txt.split(" ", 1) #print txt #print "================" #print word_boxes #print "================" #print line_and_word_boxes # Digits - Only Tesseract #digits = tool.image_to_string(Image.open('test-digits.png'), # lang=lang, # builder=pyocr.tesseract.DigitBuilder()) return txt[1]
def getDigits(image, morphology): mono = image.convert('L') w, h = mono.size img_px = mono.load() binary = Image.new('L', (w, h)) bin_px = binary.load() for y in range(h): for x in range(w): xy = [(x, y)] if morphology: xy.extend([(x+i, y+j) for (i, j) in [(0, -1),(-1,0),(1,0),(0,1)] if 0 <= (x+i) < w and 0 <= (y+j) < h]) near = [ 255 if img_px[xx, yy] > 250 else 0 for (xx, yy) in xy ] bin_px[x, y] = min(near) tools = pyocr.get_available_tools() tool = tools[0] builder = pyocr.tesseract.DigitBuilder(tesseract_layout=7) txt = tool.image_to_string(binary, lang='eng', builder=builder) return txt
def image_to_text(image): tool = pyocr.get_available_tools()[0] lang = tool.get_available_languages()[2] req_image = [] final_text = [] try: image_obj = Image(filename=image, resolution=300) except: return final_text if image[-4:] is not 'jpeg': image_obj = image_obj.convert('jpeg') for img in image_obj.sequence: img_page = Image(image=img) req_image.append(img_page.make_blob('jpeg')) for img in req_image: txt = tool.image_to_string( PI.open(io.BytesIO(img)), lang=lang, builder=pyocr.builders.TextBuilder() ) final_text.append(txt) return final_text