def ocr_image(image_path, langs='en', gpu=-1): # break the input languages into a comma separated list langs = langs.split(",") print("[INFO] OCR'ing with the following languages: {}".format(langs)) # load the input image from disk image = cv2.imread(image_path) # OCR the input image using EasyOCR print("[INFO] OCR'ing input image...") reader = Reader(langs, gpu=gpu > 0) results = reader.readtext(image) all_text = '' # loop over the results for (bbox, text, prob) in results: # display the OCR'd text and associated probability print("[INFO] {:.4f}: {}".format(prob, text)) # unpack the bounding box (tl, tr, br, bl) = bbox tl = (int(tl[0]), int(tl[1])) tr = (int(tr[0]), int(tr[1])) br = (int(br[0]), int(br[1])) bl = (int(bl[0]), int(bl[1])) # cleanup the text and draw the box surrounding the text along # with the OCR'd text itself text = cleanup_text(text) cv2.rectangle(image, tl, br, (0, 255, 0), 2) cv2.putText(image, text, (tl[0], tl[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2) all_text += ' ' + text # resize image for UI purpose h, w, _ = image.shape # image = cv2.resize(image, (700, 700)) return image, all_text
def caract_ocr(imagem, boxs): imagem = Image.fromarray(im) #print(type(imagem)) #print(boxs[3][0],boxs[3][1],(boxs[3][0]+boxs[3][2]),(boxs[3][1]+boxs[3][3])) id_caracteres = [] for i in range(len(boxs)): #print(i) #print(boxs[i][0],boxs[i][1],(boxs[i][0]+boxs[i][2]),(boxs[i][1]+boxs[i][3])) etiqueta = imagem.crop( (boxs[i][0], boxs[i][1], (boxs[i][0] + boxs[i][2]), (boxs[i][1] + boxs[i][3]))) display_img(etiqueta) etiqueta_w, etiqueta_h = etiqueta.size #0.60 e 0.70 da largura e altura da imagem sao baseados na posição relativa estimada das letras alvo. letras = etiqueta.crop( (etiqueta_w * 0.60, etiqueta_h * 0.70, etiqueta_w, etiqueta_h)) display_img(letras) letras = np.array(letras) reader = Reader(['en']) results = reader.readtext(letras) #letters = if len(results) != 0: id_caracteres.append(results[0][1]) return id_caracteres
def upload_image(): file = request.files['file'] if not file: return {'error': 'Missing file'}, 400 now = datetime.now() filename = now.strftime("%Y%m%d_%H%M%S") + "_" + str(randint(000, 999)) file.save(os.path.join(uploads_dir, secure_filename(filename + '.jpg'))) images = cv2.imread(uploads_dir + '/' + filename + '.jpg') reader = Reader(['en']) results = reader.readtext(images) text_array = [] confidence_level = [] for result in results: text_array.append(result[1]) confidence_level.append(result[2]) response = { 'text': text_array, 'confidence': confidence_level, } return jsonify({"data": response}), 200
def get_text_areas(pixels): """ Easy OCR function. Gets an image at the path below and returns the text of the picture. """ reader = Reader(['fr'], gpu=False, verbose=False) ocr_data = reader.readtext(pixels) # ocr data[0][2] is the level of confidence of the result # If the result is near 0, it is very likely that there is no text try: if ocr_data[0][2] > 0.3: return ocr_data except Exception: return []
async def read(printer: Printer): ocr_reader = Reader(['en']) screenshot = await take_screenshot(printer) screenshot_io = io.BytesIO() screenshot.save(screenshot_io, format='PNG') boxes = await asyncio.get_event_loop().run_in_executor( None, functools.partial(ocr_reader.readtext, screenshot_io.getvalue(), detail=0)) words = [] for box in boxes: words += [word.strip() for word in box.split()] text = ' '.join(words) logger.info('text on screen: %s', text) return text
help="comma separated list of languages to OCR") ap.add_argument("-g", "--gpu", type=int, default=-1, help="whether or not GPU should be used") args = vars(ap.parse_args()) # break the input languages into a comma separated list langs = args["langs"].split(",") print("[INFO] OCR'ing with the following languages: {}".format(langs)) image = cv2.imread(args["image"]) print("[INFO] OCR'ing input image...") reader = Reader(langs, gpu=args["gpu"] > 0) results = reader.readtext(image) for (bbox, text, prob) in results: # display the OCR'd text and associated probability print("[INFO] {:.4f}: {}".format(prob, text)) # unpack the bounding box (tl, tr, br, bl) = bbox tl = (int(tl[0]), int(tl[1])) tr = (int(tr[0]), int(tr[1])) br = (int(br[0]), int(br[1])) bl = (int(bl[0]), int(bl[1])) # cleanup the text and draw the box surrounding the text along # with the OCR'd text itself
type=str, default="en", help="comma separated list of languages to OCR") ap.add_argument("-g", "--gpu", type=int, default=-1, help="whether or not GPU should be used") args = vars(ap.parse_args()) now = datetime.now() # break the input languages into a comma separated list langs = args["langs"].split(",") use_tesseract = True print("[INFO] OCR'ing with the following languages: {}".format(langs)) reader = Reader(langs, gpu=True) detection_pipeline = [{ "function": "sharpening", "kwargs": {} }, { "function": "bilateral_filter", "kwargs": { "d": 7, "sigmaColor": 50, "sigmaSpace": 50 } }] recognition_pipeline = [{ "function": "cvt_to_gray", "kwargs": {}
"--langs", type=str, default="en", help="comma separated list of languages to OCR") ap.add_argument("-g", "--gpu", type=int, default=-1, help="whether or not GPU should be used") args = vars(ap.parse_args()) now = datetime.now() # break the input languages into a comma separated list langs = args["langs"].split(",") print("[INFO] OCR'ing with the following languages: {}".format(langs)) reader = Reader(langs, gpu=True) # load the input image from disk # detection_pipeline= [{"function":"cvt_to_gray", "kwargs":{}}, {"function":"apply_CAHE", "kwargs":{"clipLimit":2.0, "tileGridSize":(8,8)}}] # detection_pipeline= [{"function":"cvt_to_gray", "kwargs":{}}] detection_pipeline = [] # detection_pipeline= [{"function":"bilateral_filter", "kwargs":{"d":7, "sigmaColor":50, "sigmaSpace":50}}] # detection_pipeline= [{"function":"cvt_to_gray", "kwargs":{}},{"function":"bilateral_filter", "kwargs":{"d":7, "sigmaColor":50, "sigmaSpace":50}}] # detection_pipeline= [{"function":"sharpening", "kwargs":{}}] # detection_pipeline= [{"function":"sharpening", "kwargs":{}},{"function":"bilateral_filter", "kwargs":{"d":7, "sigmaColor":50, "sigmaSpace":50}}] # detection_pipeline= [{"function":"cvt_to_gray", "kwargs":{}},{"function":"sharpening", "kwargs":{}},{"function":"bilateral_filter", "kwargs":{"d":7, "sigmaColor":50, "sigmaSpace":50}}] # detection_pipeline= [{"function":"cvt_to_gray", "kwargs":{}}, {"function":"apply_CAHE", "kwargs":{"clipLimit":2.0, "tileGridSize":(8,8)}},{"function":"bilateral_filter", "kwargs":{"d":5, "sigmaColor":50, "sigmaSpace":50}}] # detection_pipeline= [{"function":"cvt_to_gray", "kwargs":{}},{"function":"otsu", "kwargs":{}}] # recognition_pipeline = [{"function":"cvt_to_gray", "kwargs":{}},{"function":"resize_image", "kwargs":{"new_height":128}}, {"function":"bilateral_filter", "kwargs":{"d":5, "sigmaColor":50, "sigmaSpace":150}}]
''' ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required=True, help="path to input image to be OCR'd") ap.add_argumenti("-l", "--langs", type=str, default="en", help="comma separated list of languages to OCR") ap.add_argument("-g", "--gpu", type=int, default=-1, help="whether or not GPU should be used") args = vars(ap.parse_args()) ''' # break the input languages into a comma separated list for show_image in final_images: langs = ('en').split(",") print("[INFO] OCR'ing with the following languages: {}".format(langs)) # load the input image from disk imageread = show_image # OCR the input image using EasyOCR print("[INFO] OCR'ing input image...") reader = Reader(langs) results = reader.readtext(imageread) # loop over the results for (bbox, text, prob) in results: word_list.append(text) print(word_list) word_list.append("J(&?`^") # show the output image print(word_list)
from easyocr import Reader import cv2 def cleanup_text(text): '''nếu kí tự đưa vào không nằm trong bảng ACII thì không put text được ''' return "".join([c if ord(c) < 128 else "" for c in text]).strip() '''nếu không đưa tham số nào vào strip thì mặc định là dấu cách, nếu text đưa vào là 1 câu thì sẽ tách thành các chữ, return ra 1 chuỗi gồm các chữ''' lang = ['en'] image = cv2.imread('image.jpg') reader = Reader(lang, gpu=True) results = reader.readtext(image) for (bbox, text, prob) in results: print(f'{prob}:{text}') (tl, tr, br, bl) = bbox tl = (int(tl[0]), int(tl[1])) tr = (int(tr[0]), int(tr[1])) br = (int(br[0]), int(br[1])) bl = (int(bl[0]), int(bl[1])) text = cleanup_text(text) with open('result.txt', 'a') as f: f.write(text) f.close() cv2.imshow('image', image) cv2.waitKey(0)
# break the input languages into a comma separated list languages = args["langs"].split(",") print("[INFO] OCR'ing with the following languages: {}".format(langs)) # convert url to image object is is_url is True if args["is_url"]: image_ = url_to_image(args["image"]) else: image_ = args["image"] # load the input image from disk image = cv2.imread(image_) # OCR the input image using EasyOCR print("[INFO] OCR'ing input image...") reader = Reader(languages, gpu=args["use_gpu"]) results = reader.readtext(image) # Note: Unlike Tesseract, EasyOCR can work with # OpenCV’s default BGR color channel ordering. # Therefore, we do not need to swap color channels # after loading the image. # loop over the results for (bbox, text, prob) in results: # display the OCR'd text and associated probability print("[INFO] {:.4f}: {}".format(prob, text)) # unpack the bounding box