def test_image_to_data__pandas_support(test_file_small): with pytest.raises(TSVNotSupported): image_to_data(test_file_small, output_type=Output.DATAFRAME)
import cv2 import pytesseract from pytesseract import Output img = cv2.imread('page_1.jpg') d = pytesseract.image_to_data(img, output_type=Output.DICT) print(d.keys()) n_boxes = len(d['text']) for i in range(n_boxes): if int(d['conf'][i]) > 60: (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.imwrite("C:/New folder/New Vision Soft/OCR/ITR/box_img.jpg", img) cv2.imshow('img', img) cv2.waitKey(0)
def camIdMotor(): font = cv2.FONT_HERSHEY_SIMPLEX def image_smoothening(img): ret1, th1 = cv2.threshold(img, BINARY_THREHOLD, 255, cv2.THRESH_BINARY) ret2, th2 = cv2.threshold(th1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) blur = cv2.GaussianBlur(th2, (1, 1), 0) ret3, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) return th3 def remove_noise_and_smooth(img): print('Removing noise and smoothening image') # img = cv2.imread(file_name, 0) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) filtered = cv2.adaptiveThreshold(gray.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 41, 3) kernel = np.ones((1, 1), np.uint8) opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel) closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel) img = image_smoothening(gray) or_image = cv2.bitwise_or(gray, closing) return or_image def gaussian_kernel(size, sigma=1): size = int(size) // 2 x, y = np.mgrid[-size:size + 1, -size:size + 1] normal = 1 / (2.0 * np.pi * sigma**2) g = np.exp(-((x**2 + y**2) / (2.0 * sigma**2))) * normal return g def sobel_filters(img): Kx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], np.float32) Ky = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], np.float32) Ix = ndimage.filters.convolve(img, Kx) Iy = ndimage.filters.convolve(img, Ky) G = np.hypot(Ix, Iy) G = G / G.max() * 255 theta = np.arctan2(Iy, Ix) return (G, theta) def non_max_suppression(img, D): M, N = img.shape Z = np.zeros((M, N), dtype=np.int32) angle = D * 180. / np.pi angle[angle < 0] += 180 for i in range(1, M - 1): for j in range(1, N - 1): try: q = 255 r = 255 # angle 0 if (0 <= angle[i, j] < 22.5) or (157.5 <= angle[i, j] <= 180): q = img[i, j + 1] r = img[i, j - 1] # angle 45 elif (22.5 <= angle[i, j] < 67.5): q = img[i + 1, j - 1] r = img[i - 1, j + 1] # angle 90 elif (67.5 <= angle[i, j] < 112.5): q = img[i + 1, j] r = img[i - 1, j] # angle 135 elif (112.5 <= angle[i, j] < 157.5): q = img[i - 1, j - 1] r = img[i + 1, j + 1] if (img[i, j] >= q) and (img[i, j] >= r): Z[i, j] = img[i, j] else: Z[i, j] = 0 except IndexError as e: pass return Z def threshold(img): highThreshold = img.max() * highthreshold lowThreshold = highThreshold * lowthreshold M, N = img.shape res = np.zeros((M, N), dtype=np.int32) weak = np.int32(weak_pixel) strong = np.int32(strong_pixel) strong_i, strong_j = np.where(img >= highThreshold) zeros_i, zeros_j = np.where(img < lowThreshold) weak_i, weak_j = np.where((img <= highThreshold) & (img >= lowThreshold)) res[strong_i, strong_j] = strong res[weak_i, weak_j] = weak return (res) def hysteresis(self, img): M, N = img.shape weak = self.weak_pixel strong = self.strong_pixel for i in range(1, M - 1): for j in range(1, N - 1): if (img[i, j] == weak): try: if ((img[i + 1, j - 1] == strong) or (img[i + 1, j] == strong) or (img[i + 1, j + 1] == strong) or (img[i, j - 1] == strong) or (img[i, j + 1] == strong) or (img[i - 1, j - 1] == strong) or (img[i - 1, j] == strong) or (img[i - 1, j + 1] == strong)): img[i, j] = strong else: img[i, j] = 0 except IndexError as e: pass return img def hysteresis(img): M, N = img.shape weak = weak_pixel strong = strong_pixel for i in range(1, M - 1): for j in range(1, N - 1): if (img[i, j] == weak): try: if ((img[i + 1, j - 1] == strong) or (img[i + 1, j] == strong) or (img[i + 1, j + 1] == strong) or (img[i, j - 1] == strong) or (img[i, j + 1] == strong) or (img[i - 1, j - 1] == strong) or (img[i - 1, j] == strong) or (img[i - 1, j + 1] == strong)): img[i, j] = strong else: img[i, j] = 0 except IndexError as e: pass return img def detectarPlaca(img): ##TESTING # blurred_image = gaussian_blur(image, kernel_size=9, verbose=False) # img_smoothed = convolve(img, gaussian_kernel(kernel_size, sigma)) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # gradientMat, thetaMat = sobel_filters(img_smoothed) # nonMaxImg = non_max_suppression(gradientMat, thetaMat) # thresholdImg = threshold(nonMaxImg) # img_final = hysteresis(thresholdImg) # imgNoise = remove_noise_and_smooth(img) # grayNew= unsharp_mask(img) # im_resized(img, dpi=(300, 300)) # best for OCR # img = cv2.resize(img, (300, 300)) # img = cv2.medianBlur(img, 5) # gray = cv2.blur(gray, (5, 5)) # gray= cv2.medianBlur(gray, 5) # gray= img.gaussian_kernel(5) gray = cv2.GaussianBlur(gray, (5, 5), 0) # gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] # ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY) # ret, thresh2 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV) # ret, thresh3 = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC) # ret, thresh4 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO) # ret, thresh5 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO_INV) ## OTHER # th2 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2) # th3 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) # ret3, th3 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) canny = cv2.Canny(gray, 50, 150) # kernel = np.ones((5, 5), np.uint8) canny = cv2.dilate(canny, None, iterations=2) # canny = cv2.erode(canny, None, iterations=1) kernel = np.ones((5, 5), np.uint8) # canny = cv2.morphologyEx(canny, cv2.MORPH_CLOSE, None) # canny = cv2.morphologyEx(canny, cv2.MORPH_GRADIENT,None) # canny = cv2.dilate(canny, None, iterations=2) # canny= cv2.bitwise_not(canny) # canny = cv2.erode(canny, kernel, iterations=1) return canny def unsharp_mask(image, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0): """Return a sharpened version of the image, using an unsharp mask.""" blurred = cv2.GaussianBlur(image, kernel_size, sigma) sharpened = float(amount + 1) * image - float(amount) * blurred sharpened = np.maximum(sharpened, np.zeros(sharpened.shape)) sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape)) sharpened = sharpened.round().astype(np.uint8) if threshold > 0: low_contrast_mask = np.absolute(image - blurred) < threshold np.copyto(sharpened, image, where=low_contrast_mask) return sharpened def binarizacion(img): # img = cv2.imread('gradient.png', 0) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ret, thresh1 = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) ret, thresh2 = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV) ret, thresh3 = cv2.threshold(img, 127, 255, cv2.THRESH_TRUNC) ret, thresh4 = cv2.threshold(img, 127, 255, cv2.THRESH_TOZERO) ret, thresh5 = cv2.threshold(img, 127, 255, cv2.THRESH_TOZERO_INV) # titles = ['Original Image', 'BINARY', 'BINARY_INV', 'TRUNC', 'TOZERO', 'TOZERO_INV'] # images = [img, thresh1, thresh2, thresh3, thresh4, thresh5] return thresh5 def conTornos(src): # gris = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY) # Aplicar suavizado Gaussiano # gauss = cv2.GaussianBlur(gris, (5, 5), 0) # canny = cv2.Canny(gauss, 50, 150) # imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # ret, thresh = cv2.threshold(imgray, 127, 255, 0) # im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY) # gray = cv2.GaussianBlur(gray, (7, 7), 3) # t, dst = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_TRIANGLE) # gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY) # _, th = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) # canny = cv2.Canny(th, 50, 150) # img1, contornos1, hierarchy1 = cv2.findContours(th, cv2.RETR_EXTERNAL, # cv2.CHAIN_APPROX_NONE) # img2, contornos2, hierarchy2 = cv2.findContours(th, cv2.RETR_EXTERNAL, # cv2.CHAIN_APPROX_SIMPLE) # contornos1, hierarchy1 = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY) # gray = cv2.blur(gray, (3, 3)) gray = cv2.bilateralFilter(gray, 11, 17, 17) # Blur to reduce noise th3 = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, \ cv2.THRESH_BINARY, 11, 2) canny = cv2.Canny(gray, 150, 200) # canny = cv2.dilate(canny, None, iterations=1) canny = cv2.morphologyEx(canny, cv2.MORPH_OPEN, None) return canny def maskNew2(img): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # grayscale _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV) # threshold kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3)) dilated = cv2.dilate(thresh, kernel, iterations=13) # dilate return dilated def maskNew1(img): gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #canny = cv2.Canny(gray_img, 100, 200) # gray_img = canny ret1, th1 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY) # th3 = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) # global thresholding ret1, th1 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY) # Otsu's thresholding ret2, th2 = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # Otsu's thresholding after Gaussian filtering blur = cv2.GaussianBlur(gray_img, (5, 5), 0) ret3, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) ##other ret, thresh1 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY) ret, thresh2 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY_INV) ret, thresh3 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_TRUNC) ret, thresh4 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_TOZERO) ret, thresh5 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_TOZERO_INV) return th2 def rescalingNew(imgNew): img = cv2.resize(imgNew, None, fx=1.2, fy=1.2, interpolation=cv2.INTER_CUBIC) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # kernel = np.ones((1, 1), np.uint8) # img = cv2.dilate(img, kernel, iterations=1) # img = cv2.erode(img, kernel, iterations=1) # cv2.threshold(cv2.bilateralFilter(img, 5, 75, 75), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # cv2.threshold(cv2.medianBlur(img, 3), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # cv2.adaptiveThreshold(cv2.bilateralFilter(img, 9, 75, 75), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # cv2.THRESH_BINARY, 31, 2) cv2.adaptiveThreshold(cv2.medianBlur(img, 3), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2) return img def maskNew(img): frame = cv2.resize(img, (400, 300), interpolation=cv2.INTER_CUBIC) hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) lowerBlue = np.array([100, 50, 50]) upperBlue = np.array( [130, 255, 255] ) # cv2.adaptiveThreshold(cv2.medianBlur(img, 3), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2) mask = cv2.inRange(hsv, lowerBlue, upperBlue) return mask def detectaCode(img): gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_img = cv2.bitwise_not(gray_img) return gray_img def umbralChangeBack(img): gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) img_contours = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2] img_contours = sorted(img_contours, key=cv2.contourArea) for i in img_contours: if cv2.contourArea(i) > 100: break mask = np.zeros(img.shape[:2], np.uint8) cv2.drawContours(mask, [i], -1, 255, -1) new_img = cv2.bitwise_and(img, img, mask=mask) return new_img def contrast(img): return cv2.addWeighted(img, 1.5, np.zeros(img.shape, img.dtype), 0, 0) def focusing(val): value = (val << 4) & 0x3ff0 data1 = (value >> 8) & 0x3f data2 = value & 0xf0 os.system("i2cset -y 0 0x0c %d %d" % (data1, data2)) def sobel(img): img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) img_sobel = cv2.Sobel(img_gray, cv2.CV_16U, 1, 1) return cv2.mean(img_sobel)[0] def laplacian(img): img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) img_sobel = cv2.Laplacian(img_gray, cv2.CV_16U) return cv2.mean(img_sobel)[0] # get grayscale image def get_grayscale(image): img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) ret, thresh2 = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV) # Aplicar suavizado Gaussiano gauss = cv2.GaussianBlur(thresh2, (5, 5), 0) # canny = cv2.Canny(gauss, 100, 200) # canny= auto_canny(img) return gauss def get_grayscaleNEW(image): return cv2.bilateralFilter(image, 11, 17, 17) # noise removal def remove_noise(image): return cv2.medianBlur(image, 5) # thresholding def thresholding(image): return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] def thresHolding(img): return cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) # dilation def dilate(image): kernel = np.ones((5, 5), np.uint8) return cv2.dilate(image, kernel, iterations=1) # erosion def erode(image): kernel = np.ones((5, 5), np.uint8) return cv2.erode(image, kernel, iterations=1) # opening - erosion followed by dilation def opening(image): kernel = np.ones((5, 5), np.uint8) return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel) # canny edge detection def canny(image): return cv2.Canny(image, 100, 200) # skew correction def auto_canny(image, sigma=0.33): # compute the median of the single channel pixel intensities v = np.median(image) # apply automatic Canny edge detection using the computed median lower = int(max(0, (1.0 - sigma) * v)) upper = int(min(255, (1.0 + sigma) * v)) edged = cv2.Canny(image, lower, upper) # return the edged image return edged def deskew(image): coords = np.column_stack(np.where(image > 0)) angle = cv2.minAreaRect(coords)[-1] if angle < -45: angle = -(90 + angle) else: angle = -angle (h, w) = image.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, angle, 1.0) rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) return rotated # template matching def match_template(image, template): return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) ap = argparse.ArgumentParser() ap.add_argument("-v", "--video", help="path to the (optional) video file") args = vars(ap.parse_args()) # if the video path was not supplied, grab the reference to the # camera if not args.get("video", False): vs = VideoStream(src=0).start() # vs = cv2.VideoCapture(0) time.sleep(2.0) # otherwise, load the video else: vs = cv2.VideoCapture(args["video"]) # keep looping over the frames while True: # check to see if we have reached the end of the # video # time.sleep(2.0) frame = vs.read() frame = frame[1] if args.get("video", False) else frame if frame is None: break # length = int(vs.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)) # width = int(vs.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)) # height = int(vs.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)) # fps = vs.get(cv2.cv.CV_CAP_PROP_FPS) # grab the current frame and then handle if the frame is returned # from either the 'VideoCapture' or 'VideoStream' object, # respectively # frame = thresholding(frame) # frame = opening(frame) # frame = canny(frame) # frame = canny(frameOriginal) # frameModificado = get_grayscale(frame) # frameModificado = binarizacion(frame) frameModificado = detectarPlaca(frame) # frame= image_smoothening(frame) # frame= auto_canny(frame) # frame = sobel(frame) # frame = laplacian(frame) # frame = umbralChangeBack(frame) # frame = detectaCode(frame) # frame = get_grayscale(frame) # frame = thresHolding(frame) # frame = maskNew1(frame) # frame = contrast(frame) # frame = conTornos(frame) # frame = maskNew1(frame) # frame= rescalingNew(frame) # contornos1,hierarchy1 = cv2.findContours(frameOriginal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) cnts, _ = cv2.findContours(frameModificado, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) # OpenCV 4 # print('Número de contornos encontrados: ', len(cnts)) for c in cnts: area = cv2.contourArea(c) if area > 1500 and area < 6000: # print(area) # nuevoContorno = cv2.convexHull(c) perimeter = cv2.arcLength(c, True) epsilon = 0.1 * cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, epsilon, True) hull = cv2.convexHull(c) M = cv2.moments(c) if (M["m00"] == 0): M["m00"] = 1 x = int(M["m10"] / M["m00"]) y = int(M['m01'] / M['m00']) cv2.drawContours(frame, [approx], 0, (255, 0, 0), 3) # cv2.drawContours(frame, c, -1, (0, 0, 255), 3) # Dibujar el centro cv2.circle(frame, (x, y), 3, (0, 0, 255), -1) # cv2.drawContours(frame, cnts, -1, (0, 255, 0), 2) # epsilon = 0.01 * cv2.arcLength(c, True) # approx = cv2.approxPolyDP(c, epsilon, True) # print(len(approx)) # x, y, w, h = cv2.boundingRect(approx) # print('AREA', x, y, w, h) # placa = frameModificado[y:y + h, x:x + w] # textNew = pytesseract.image_to_string(placa, config='--psm 11') # print('PLACA: ', textNew) # textCycle = pytesseract.image_to_string([nuevoContorno], config='--psm 11') # print(textCycle) custom_oem_psm_config = r'--oem 3 --psm 6' # config = '--psm 11' d = pytesseract.image_to_data(frameModificado, output_type=Output.DICT, config=custom_oem_psm_config) #print(d.keys()) print(d['text']) ## EXAMPLE text = pytesseract.image_to_string(frameModificado, config=custom_oem_psm_config) # print("Detected Number is:", text) x = 0 y = 0 n_boxes = len(d['text']) for i in range(n_boxes): if int(d['conf'][i]) > 60: (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) frameModificado = cv2.rectangle(frameModificado, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.putText(frameModificado, '{}'.format(d['text']), (x - 60, y + 160), font, 1.00, (0, 0, 0), 2, cv2.LINE_AA) cv2.imshow('Frame', frame) cv2.imshow('Frame_Modificado', frameModificado) # cv2.imshow("Frame_Original", frameOriginal) # Salir con 'ESC' patronNew = re.compile('^FKI[0-9][0-9][0-9][0-9][0-9]') tempLength = d['text'] print(len(d['text'])) for t in d['text']: findText = re.search(patronNew, t, flags=0) # print("Iteracion: " + t) if findText: print("Detected Number is:", findText.group(0)) break # findText = re.search(patronNew, text, flags=0) # print(findText.group(0)) if findText: ## print("Detected Number is:", findText.group(0)) break k = cv2.waitKey(5) & 0xFF if k == 27: break # if we are not using a video file, stop the video file stream if not args.get("video", False): vs.stop() # otherwise, release the camera pointer else: vs.release() # close all windows cv2.destroyAllWindows() # waitTime = findText.group(0) # print('waitTime: ' + waitTime) return findText.group(0)
titles = [ 'Original Image', 'BINARY', 'BINARY_INV', 'TRUNC', 'TOZERO', 'TOZERO_INV' ] images = [img, thresh1, thresh2, thresh3, thresh4, thresh5] for i in range(6): plt.subplot(2, 3, i + 1) plt.imshow(images[i], 'gray', vmin=0, vmax=255) plt.title(titles[i]) plt.xticks([]), plt.yticks([]) plt.show() custom_config = r'--oem 3 --psm 6 -l kor+eng' words = pytesseract.image_to_data(thresh4, config=custom_config, output_type=Output.DICT) ## pytesseract n_boxes = len(words['text']) print(n_boxes) for i in range(n_boxes): if int(words['conf'][i]) > 60: (x, y, w, h) = (words['left'][i], words['top'][i], words['width'][i], words['height'][i]) cv.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 1) cv.putText(img, words['text'][i], (x, y), cv.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1) img_crop = img[y:y + h, x:x + w] cv.imshow('img_crop', img_crop) cv.imshow('Resource', img) cv.waitKey(0)
def __init__(self, dir_to_find=None, oem=None, psm=None, parent=None, filter_image_selected=None): from os import scandir, path # App Message if parent == None: self.parent = self self.parent.info = self.info #check exist and set path of EXE Tesseract exe_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe' if not path.exists(exe_tesseract): self.parent.info('File not exist: ' + exe_tesseract) return pytesseract.pytesseract.tesseract_cmd = exe_tesseract # Adding custom options if not oem == None and not psm == None: custom_config = r'--oem ' + str(oem) + ' --psm ' + str(psm[:2]) elif not oem == None: custom_config = r'--oem ' + str(oem) custom_config = r' --psm' + str(psm) else: custom_config = r'--oem 3 ' # Select Photo Directory if dir_to_find == None: self.select_directory() else: self.dir_to_find = dir_to_find result = {} correct = 0 incorrect = 0 # Create TXT File with Results file_txt = open(path.join(self.dir_to_find, 'Result_OCR_Scan.txt'), 'w') file_txt.write('Configuration ' + custom_config + chr(10)) file_txt.write('Filter ' + filter_image_selected + chr(10) + chr(10)) for file in scandir(self.dir_to_find): # extract name file and extension separate name, ext = path.splitext(file.name) # Find coordinates from a picture file if ext.upper() == '.JPG' or ext.upper() == '.JPEG' or ext.upper( ) == '.PNG': find = False incorrect += 1 day = ' No Encontrado' hour = '' lat = '' lon = '' # Filter Image for a good reading self.filter_image(file.path, filter_image_selected) read = pytesseract.image_to_data(self.img, output_type=Output.DICT) for i in range(len(read['text'])): if read['text'][i] == 'GARMIN': find = True day = read['text'][i + 1] hour = read['text'][i + 2] lat = float(read['text'][i + 3]) lon = float(read['text'][i + 4]) self.geotag_photo(file.path, lat, lon) incorrect -= 1 correct += 1 break result[file.path] = { 'day': day, 'hour': hour, 'lat': lat, 'lon': lon, 'find': find } text_result = file.path + " " + day + " " + hour + " " + str( lat) + " " + str(lon) file_txt.write(text_result + chr(10)) self.parent.info(text_result) #self.parent.info(result) self.parent.info('GeoLocalizados : ' + str(correct)) self.parent.info('No GeoLocalizados: ' + str(incorrect)) file_txt.write('GeoLocalizados : ' + str(correct) + chr(10)) file_txt.write('No GeoLocalizados: ' + str(incorrect) + chr(10)) file_txt.close()
def imgtodata(i): j = pytesseract.image_to_data(i, output_type=Output.DICT) #print(j.keys()) k = len(j['text']) return j, k
""" Created on Tue Mar 3 16:32:52 2020 @author: Namita Simple program to detect text and boxing it using pytessersct Usage : python box.py """ import cv2 import pytesseract from pytesseract import Output img_cv = cv2.imread('image.jpg') # read image using image path img = cv2.cvtColor( img_cv, cv2.COLOR_BGR2RGB) # Because pytesseract works good on grayscale d = pytesseract.image_to_data( img, output_type=Output.DICT) #convert image to data object # print(pytesseract.image_to_boxes(img)) # print(d['text']) n_boxes = len(d['text']) for i in range(n_boxes): if int(d['conf'][i]) > 60: (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.imshow('IMG.png', img) #display image cv2.waitKey( 0 ) #wait key zero means manual closing the window, You can add time in mils
from numpy import asarray #ap = argparse.ArgumentParser() #p.add_argument("-i" , "--testID.jpg" , help = "path to input image to be OCR'd", default = "stdout") #ap.add_argument ("-o" , "--output" , help = "path to the output CSV file") #ap.add_argument("-c" , "--min conf" , help = "minimum confidence value to filter weak text detection") #ap.add_argument("-d" , "--dist-thresh" , type = float , default = 25.0) #ap.add_argument("-s", "--min-size", type = int, default = 2 , help = "minimum cluster size") #args = vars(ap.parse_args()) #np.random.seed(42) pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' custom_config = r' --oem 3 --psm 11' images = cv2.imread(r"C:\Users\King alagbe\Downloads\testID.jpg") images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB) height, width, t = images.shape boxes = pytesseract.image_to_data(images , config= custom_config) print(boxes) for x, b in enumerate(boxes.splitlines()): if x!=0: #print(b) b =b.split() print(b) if len(b) == 12: x,y,w,h = int(b[6]), int(b[7]), int(b[8]), int(b[9]) cv2.rectangle(images, (x, y), (w + x, h + y),(225,0,0), 2) cv2.putText(images, b[11], (x,y), cv2.FONT_HERSHEY_COMPLEX,0.3,(50,50,255), 1) cv2.imshow("Result", images) cv2.waitKey(0)
def bot(): # get window and rect window = windows[index] rect = windowRects[index] #sepperate x,y,w,h of rect x = rect[0] y = rect[1] w = rect[2] h = rect[3] #create bounding box: top and height are cut bounding_box = (x, y + 100, w, h - 160) waittime = 0 try: while True: if (keyboard.is_pressed("q")): break windowimg = ImageGrab.grab(bbox=bounding_box) #conert to type L, means just black and white windowimg = windowimg.convert('L') #convert image to numpy array to work with pixels img = np.array(windowimg) # add Threshhold, just black and white ret, img = cv2.threshold(img, 240, 255, cv2.THRESH_BINARY) d = pytesseract.image_to_data(img, output_type='data.frame') cv2.imshow('screen', img) if (cv2.waitKey(1) & 0xFF) == ord('q'): cv2.destroyAllWindows() break # filter the results of tesseract for flyff # these filter arent perfect yet d = d[d['text'].apply(lambda x: isinstance(x, str))] d = d[(d['conf'] != -1) & (d['conf'] > 2) & (d['text'] != "") & (d['text'] != " ") & (d['text'] != "NaN") & (d['text'].apply(lambda x: len(x) > 2))] # text in image is found if (d.shape[0] > 0): # get row of found text row = d.iloc[0] # + 20 because x + row[left] is the x-top left corner of text monsterx = x + row['left'] + 20 # + 5 because y + row[top] is the y-top left corner of text # + 100 because we did + 100 at bounding box earlier monstery = y + row['top'] + 100 + 5 print("Found Monster in Window: " + str(window) + " at x: " + str(monsterx) + " at y: " + str(monstery)) #click click(monsterx, monstery) #spellcast keyboard.press_and_release("c") # general wait time for next analysis of image time.sleep(0.1) except KeyboardInterrupt: exit()
def inventory_item_full_info(slot_i, slot_pos, slot_size, ss_path): """ Returns a tuple follow this format : (slot_i, item_name, amount) (int, str, int) It uses the item_amount function :param slot_i: <int> Slot index :param slot_pos: <tuple> Format (X, Y) :param slot_size: <int> Slot size (needed for item_amount screenshots) :param ss_path: <str> Path to save screenshots (it spams them) Example : "./screenshots/" """ max_length = 400 slot_info = (slot_i, "null", "null" ) #Won't be changed if no items detected pydirectinput.moveTo(slot_pos[0], slot_pos[1]) screenshot = pyautogui.screenshot(f"{ss_path}slot_{slot_i}.png",\ region=(slot_pos[0]+17,slot_pos[1]-30, max_length, 24))#Takes Screenshot pixel = screenshot.getpixel( (1, 0)) #First top left pixel indicates if we found an item # item names are surrounded in a pink/black box, that we use to detect if one found if (25 <= pixel[0] <= 45) and (0 <= pixel[1] <= 15) and (80 <= pixel[2] <= 100): # More or less close to (35, 0, 90) that we're looking for # If entered here, an item has been detected width = max_length - 1 while width >= 0: new_px = screenshot.getpixel((width, 0)) if (25 <= new_px[0] <= 45) and (0 <= new_px[1] <= 15) and (80 <= new_px[2] <= 100): # Means we reached the box around item name, no extra space on the right anymore break width -= 10 print(width) screenshot = pyautogui.screenshot(f"{ss_path}slot_{slot_i}.png",\ region=(slot_pos[0]+17,slot_pos[1]-30, width, 24))#New updated screenshot # Better OCR as a result of cropping the image content = pytesseract.image_to_data(screenshot) # image_to_data separates all text found in words item_name = '' for x, bx in enumerate( content.splitlines()): # Each line holds different data if x != 0: # First line is a different format and holds no data we need bx = bx.split() if len(bx) == 12: # len is 12 when a word is found item_name += f'{bx[11]} ' # word held at index 11 (last one) pyautogui.moveTo( 64, 103) #Outside of inventory to not block item screenshot amount = item_amount(slot_pos, slot_size, f'slot_{slot_i}_amount.png', ss_path=ss_path) # Calls item_amount to get the amount on this slot slot_info = (slot_i, item_name, amount) return slot_info
import mysql.connector import sys from gtts import gTTS from PIL import Image from pytesseract import image_to_string from pytesseract import image_to_boxes from pytesseract import image_to_osd from pytesseract import image_to_data from pytesseract import image_to_pdf_or_hocr #from pytesseract import osd_to_dict #from pytesseract import run_tesseract #from pytesseract import run_and_get_output img = Image.open("/home/san/aaa/example_03.png") text = image_to_data(img) print text text = image_to_boxes(img) print text #====================================== # db= mysql.connector.connect(user="******",password="******",host="localhost",database="dsaa") # mycursor = db.cursor() # print """ # +=========================================+ # | Please select page no. of Book | # +=========================================+ # """
def processImage(): readImage = cv2.imread('screenshot.png') imageHeight, imageWidth = readImage.shape[:2] lowerWidthBound = int(.29 * imageWidth) # 555px / 1920px upperWidthBound = int(.375 * imageWidth) # 720px / 1920px lowerHeightBound = int(.435 * imageHeight) # 615px / 1080px upperHeightBound = int(.57 * imageHeight) # 470px / 1080px # Crop and block out colors on the image croppedImage = readImage[lowerHeightBound:upperHeightBound, lowerWidthBound:upperWidthBound] lowerColorBound = np.array([27, 27, 27], dtype="uint16") upperColorBound = np.array([50, 50, 50], dtype="uint16") colorMask = cv2.inRange(croppedImage, lowerColorBound, upperColorBound) # Find text contours imageContours, imageHierarchy = cv2.findContours(colorMask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Create lists to hold our Tesseract data possibleChars = [] possibleCharsData = [] # Process everything that may be text for possibleText in imageContours: # Create a bounding rectangle around the contour [x, y, width, height] = cv2.boundingRect(possibleText) # Draw rectangles around the contours on the original image cv2.rectangle(colorMask, (x, y), (x + width, y + height), (255, 0, 255), 0) # Create our region of interest regionOfInterest = colorMask[y:(y + height), x:(x + width)] # Enlarge the region enlargedImage = cv2.copyMakeBorder(regionOfInterest, 10, 10, 10, 10, cv2.BORDER_CONSTANT, value=[255, 255, 255]) # Run Tesseract OCT against the image to try and find the most likely keystroke possibleChars.append( pytesseract.image_to_string( regionOfInterest, lang='eng', config= '--psm 10 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz' )) possibleCharsData.append( pytesseract.image_to_data( regionOfInterest, lang='eng', config= '--psm 10 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz' )) # Return all of the possible characters for the AFK key return possibleChars, possibleCharsData
import pytesseract from PIL import Image import cv2 print(pytesseract.image_to_data(Image.open('./img/np1.jpeg'))) print(pytesseract.image_to_data(Image.open('./img/np2.jpeg'))) img = cv2.imread('./img/np1.jpeg') print(pytesseract.image_to_string(img))
box = cv2.boxPoints(rect) box = np.int0(box) #check for 90 degrees rotation if ((width / float(height)) > 1): # crop a particular rectangle from the source image cropped_image = cv2.getRectSubPix(carsample_mask, (width, height), centre) else: # crop a particular rectangle from the source image cropped_image = cv2.getRectSubPix(carsample_mask, (height, width), centre) # convert into grayscale cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) # equalize the histogram cropped_image = cv2.equalizeHist(cropped_image) # resize to 260 cols and 63 rows. (Just something I have set as standard here) cropped_image = cv2.resize(cropped_image, (260, 180)) cropped_images.append(cropped_image) _ = plt.subplots_adjust(hspace=0.000) number_of_subplots = len(cropped_images) for i, v in enumerate(range(number_of_subplots)): v = v + 1 #ax1 = plt.subplot(number_of_subplots,1,v) showfig(cropped_images[i], plt.get_cmap('gray')) plt.show() for img in cropped_images: data = pytesseract.image_to_data(Image.fromarray(img), output_type='dict') print(data)
def ocr_img(img: np.array, input_file: str, search_str: str, highlight_readable_text: bool = False, action: str = 'Highlight', show_comparison: bool = False, generate_output: bool = True): """Scans an image buffer or an image file. Pre-processes the image. Calls the Tesseract engine with pre-defined parameters. Calculates the confidence score of the image grabbed content. Draws a green rectangle around readable text items having a confidence score > 30. Searches for a specific text. Highlight or redact found matches of the searched text. Displays a window showing readable text fields or the highlighted or redacted text. Generates the text content of the image. Prints a summary to the console.""" # If image source file is inputted as a parameter if input_file: # Reading image using opencv img = cv2.imread(input_file) # Preserve a copy of this image for comparison purposes initial_img = img.copy() highlighted_img = img.copy() # Convert image to binary bin_img = convert_img2bin(img) # Calling Tesseract # Tesseract Configuration parameters # oem --> OCR engine mode = 3 >> Legacy + LSTM mode only (LSTM neutral net mode works the best) # psm --> page segmentation mode = 6 >> Assume as single uniform block of text (How a page of text can be analyzed) config_param = r'--oem 3 --psm 6' # Feeding image to tesseract details = pytesseract.image_to_data(bin_img, output_type=Output.DICT, config=config_param, lang='eng') # The details dictionary contains the information of the input image # such as detected text, region, position, information, height, width, confidence score. ss_confidence = calculate_ss_confidence(details) boxed_img = None # Total readable items ss_readable_items = 0 # Total matches found ss_matches = 0 for seq in range(len(details['text'])): # Consider only text fields with confidence score > 30 (text is readable) if float(details['conf'][seq]) > 30.0: ss_readable_items += 1 # Draws a green rectangle around readable text items having a confidence score > 30 if highlight_readable_text: (x, y, w, h) = (details['left'][seq], details['top'][seq], details['width'][seq], details['height'][seq]) boxed_img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) # Searches for the string if search_str: results = re.findall(search_str, details['text'][seq], re.IGNORECASE) for result in results: ss_matches += 1 if action: # Draw a red rectangle around the searchable text (x, y, w, h) = (details['left'][seq], details['top'][seq], details['width'][seq], details['height'][seq]) # Details of the rectangle # Starting coordinate representing the top left corner of the rectangle start_point = (x, y) # Ending coordinate representing the botton right corner of the rectangle end_point = (x + w, y + h) # Color in BGR -- Blue, Green, Red if action == "Highlight": color = (0, 255, 255) # Yellow elif action == "Redact": color = (0, 0, 0) # Black # Thickness in px (-1 will fill the entire shape) thickness = -1 boxed_img = cv2.rectangle(img, start_point, end_point, color, thickness) if ss_readable_items > 0 and highlight_readable_text and not ( ss_matches > 0 and action in ("Highlight", "Redact")): highlighted_img = boxed_img.copy() # Highlight found matches of the search string if ss_matches > 0 and action == "Highlight": cv2.addWeighted(boxed_img, 0.4, highlighted_img, 1 - 0.4, 0, highlighted_img) # Redact found matches of the search string elif ss_matches > 0 and action == "Redact": highlighted_img = boxed_img.copy() # cv2.addWeighted(boxed_img, 1, highlighted_img, 0, 0, highlighted_img) # save the image cv2.imwrite("highlighted-text-image.jpg", highlighted_img) # Displays window showing readable text fields or the highlighted or redacted data if show_comparison and (highlight_readable_text or action): title = input_file if input_file else 'Compare' conc_img = cv2.hconcat([initial_img, highlighted_img]) display_img(title, conc_img) # Generates the text content of the image output_data = None if generate_output and details: output_data = generate_ss_text(details) # Prints a summary to the console if input_file: summary = { "File": input_file, "Total readable words": ss_readable_items, "Total matches": ss_matches, "Confidence score": ss_confidence } # Printing Summary print( "## Summary ########################################################" ) print("\n".join("{}:{}".format(i, j) for i, j in summary.items())) print( "###################################################################" ) return highlighted_img, ss_readable_items, ss_matches, ss_confidence, output_data
def ocr(self, psm: int = 4, oem: int = 1): config = '--psm {} --oem {}'.format(psm, oem) ocr_df = pytesseract.image_to_data(self.img, config=config, output_type=Output.DATAFRAME) return TesseractOcrOperand(self, ocr_df)
def charsearchdata(what=49, graphic_enable=graphic_on): data = pytesseract.image_to_data(gray, config='--psm 13') print(data)
ap.add_argument("-i", "--image", required=True, help="path to input image to be OCR'd") ap.add_argument("-c", "--min-conf", type=int, default=0, help="mininum confidence value to filter weak text detection") args = vars(ap.parse_args()) # load the input image, convert it from BGR to RGB channel ordering, # and use Tesseract to localize each area of text in the input image image = cv2.imread(args["image"]) rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) results = pytesseract.image_to_data(rgb, output_type=Output.DICT, lang="jpn") # loop over each of the individual text localizations for i in range(0, len(results["text"])): # extract the bounding box coordinates of the text region from # the current result x = results["left"][i] y = results["top"][i] w = results["width"][i] h = results["height"][i] # extract the OCR text itself along with the confidence of the # text localization text = results["text"][i] conf = int(results["conf"][i])
return ''.join(cuit.groups()) #endregion #region - "MAIN" if __name__ == "__main__": obtener_puntos() niveles = sorted(niveles) lineas = obtener_lineas(puntos, niveles) obtener_secciones(lineas) for sec in range(len(secciones)): key = 'Seccion-' + str(sec + 1) boxes = pytesseract.image_to_data(secciones[sec]) ocr_secciones[key] = limpiar_boxes(boxes) '''cv2.imshow("Seccion", secciones[sec]) cv2.waitKey(0)''' print(ocr_secciones) cv2.destroyAllWindows() #endregion #region - PROCESAMIENTO PALABRAS nombre_obtenido = False for key in ocr_secciones: if len(ocr_secciones[key]) > 3 and (not nombre_obtenido): #print(ocr_secciones[key]) razon_social = obtener_razon_social(ocr_secciones[key])
import pytesseract pytesseract.pytesseract.tesseract_cmd="C:/Program Files (x86)/Tesseract-OCR/tesseract.exe" img=cv2.imread('1.jpg') img1=img.copy() img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB) #print(pytesseract.image_to_string(img )) #for image to Boxes imx,imy,imc=img.shape print(imx) boundary=pytesseract.image_to_boxes(img ) for b in boundary.splitlines(): b=b.split(' ') x,y,w,h=int(b[1]),int(b[2]),int(b[3]),int(b[4]) cv2.rectangle(img,(x,imx-y),(w,imx-h),(255,0,0),1) cv2.putText(img,b[0],(x,imx-y-20),cv2.FONT_HERSHEY_DUPLEX,0.34,(0,0,0),1) cv2.imshow('IMAGE TO LETTERS', img) #image to Data boundary1=pytesseract.image_to_data(img1) for c,b1 in enumerate(boundary1.splitlines()): if(c!=0): b1=b1.split() if len(b1)==12: x1,y1,w1,h1=int(b1[6]),int(b1[7]),int(b1[8]),int(b1[9]) cv2.rectangle(img1,(x1,y1),(w1+x1,h1+y1),(255,0,0),1) cv2.putText(img1,b1[11],(x1,y1),cv2.FONT_HERSHEY_DUPLEX,0.34,(0,0,0),1) cv2.imshow('Image TO DATA', img1) cv2.waitKey(0)
contours, hierarchy = cv2.findContours( dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) im2 = img.copy() file = open("recognized.json", "w+") file.write("[") file.close() for idx, cnt in enumerate(contours): x, y, w, h = cv2.boundingRect(cnt) rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2) cropped = im2[y:y + h, x:x + w] file = open("recognized.json", "a") print("[INFO] reading text...") text = pytesseract.image_to_data(cropped, output_type=Output.DICT) print("[INFO] writing text...") file.write(json.dumps(text)) if idx != len(contours) - 1: file.write(",") else: file.write("]") # print(text) file.close() print("[Finished] Data written to the file .\\recognised.json")
def grid_search_adaptive_binarization( img_path, cv_adaptive_sizes=[15, 25], cv_adaptive_cs=[10], cv_adaptive_methods=['ADAPTIVE_THRESH_MEAN_C'], output=True, eval_method='mlc', ground_truth_path=None): ''' try each size and C value to find best adaptive binarization params eval_method = [mlc, wer] evaluation measurement = WORD ERROR RATE return dict of results ''' if eval_method not in ['mlc', 'wer']: print('error: invalid eval_method') return False if eval_method == 'wer' and ground_truth_path is not None: # load try: ground_truth = open(ground_truth_path, 'r').read() except: print('error: cannot load ground truth from .txt') if eval_method == 'wer' and ground_truth_path is None: print( 'error: please provide a gound truth text if evaluation method == wer' ) return False import time import numpy as np print_time_once = True gridsearch_result = [] for method in cv_adaptive_methods: for c in cv_adaptive_cs: for size in cv_adaptive_sizes: start_time = time.time() # apply treshold img_bin_adapt = adaptive_binary(img_path, size, c, method) # apply ocr df_data = pytesseract.image_to_data(img_bin_adapt, output_type='data.frame') # measurement if eval_method == 'mlc': try: length, text, measure = image_to_data_stats( df_data, output=False) except (e): print('error: mean line confidence', e) measure = np.nan length = np.nan elif eval_method == 'wer': try: hypothesis = ' '.join( df_data[df_data['text'].notnull()].text.to_list()) length, _, _ = image_to_data_stats(df_data, output=False) measure = word_error_rate(ground_truth, hypothesis) except: measure = np.nan length = np.nan # add gs data gridsearch_result.append({ 'length': length, eval_method: measure, 'size': size, 'c': c, 'method': method }) # make the time stop iterat time_end = round((time.time() - start_time), 2) if print_time_once: cv_iterations = len(cv_adaptive_sizes) * len( cv_adaptive_cs) * len(cv_adaptive_methods) print('eval_method:', eval_method, ' nr. of iterations:', cv_iterations, 'est. time (min):', round(cv_iterations * time_end / 60, 2)) print_time_once = False if output: print('GS>', 'method', method, 'size', size, 'c', c, eval_method, measure, '\t time:', time_end) plot_img(img_bin_adapt, 150) plt.pause(0.05) '''return dict and best index with best params''' if eval_method == 'mlc': best_idx, best_params = max(enumerate(gridsearch_result), key=lambda item: item[1][eval_method]) elif eval_method == 'wer': best_idx, best_params = min(enumerate(gridsearch_result), key=lambda item: item[1][eval_method]) return gridsearch_result, best_idx, best_params
def img_to_data(img): return pytesseract.image_to_data(img, output_type = pytesseract.Output.DICT)
def get_data(im): return pytesseract.image_to_data(im, output_type=Output.DICT)
img = cv.imread(root_path + filename) if img is None: print("Error opening video stream or file") sys.exit() img_thresholding = img.copy() import pytesseract from pytesseract import Output # custom_config = r'--oem 3 --psm 6' custom_config = r'--oem 3 --psm 6 -l kor+kor_vert+eng' # korean + english words_string = pytesseract.image_to_string(img) words = pytesseract.image_to_data(img, config=custom_config, output_type=Output.DICT) print(words.keys()) n_boxes = len(words['text']) for i in range(n_boxes): if int(words['conf'][i]) > 60: (x, y, w, h) = (words['left'][i], words['top'][i], words['width'][i], words['height'][i]) img = cv.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) cv.imshow('Resource', img) #thresholding def thresholding(image): return cv.threshold(image, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)[1]
if count > 1000: break # cv2.imwrite("frame%d.jpg" % count, image) # save frame as JPEG file success, image = vidcap.read() # print('Read a new frame: ', success) if (success): count += 1 if (count > (13 * 30) and count % 50 == 0): try: img = image[650:702, 40:155] img = (get_grayscale(img)) # Show Analysed Image # cv2.imshow('img', img) # cv2.waitKey(0) d = pytesseract.image_to_data(img, lang="eng", config=custom_config, output_type=Output.DICT) print(d['text']) n_boxes = len(d['text']) for i in range(n_boxes): matches = [ "REPLAY", "REPL", "REP", "PLAY", "EPL", "PLA" ] if any(x in d['text'][i] for x in matches): print("Goal scored detected at frame: {}".format( count)) goalDetected = True videosWithConfirmedGoal += 1 detectedGoalList.append([v, count]) # Overlay Bounding Boxes # if int(d['conf'][i]) > 60:
def tsvOCR(img, savpath, tsvfile, p=1, append=True, psm=1, oem=3): """ Run OCR on an image and produce a TSV file along with text. Can limit which files are added to the TSV by way of binomial sampling. Arguments -------------------------------------------------------------------------- img (str, PIL image) : Either the file path of an image or a PIL image object savpath (str) : The file path to save the text output. tsvfile (str) : Name of a TSV file that will be produced in the same directory as the text output. Ignored if not specified. p : Probability to use for binomial sampling. If 1, no sample will be taken. append (boolean) : If true, text output is appended to the file specified by savpath. If the file does not exist, it will be created. Note: this argument also applies to the TSV output if a TSV file is specified. psm (int) : Tesseract configuration for Page Segmentation Mode. https://github.com/tesseract-ocr/tesseract/blob/master/doc/tesseract.1.asc oem (int) : Tesseract configuration for OCR Engine mode. https://github.com/tesseract-ocr/tesseract/blob/master/doc/tesseract.1.asc Returns -------------------------------------------------------------------------- none """ #if a filename is used for the image, load the image if type(img) == str: name = os.path.split(img)[1] img = Image.open(img) else: name = img.info["name"] # Set up tesseract config if 0 <= psm <= 13: pconfig = "--psm " + str(psm) else: return "Invalid tesseract config." if 0 <= oem <= 3: oconfig = "--oem " + str(oem) else: return "Invalid tesseract config." tconfig = pconfig + " " + oconfig #determine mode to save files if append == True: mode = {"mode": "a"} else: mode = {"mode": "w"} #Get TSV data tsvs = pytesseract.image_to_data(img, config=tconfig) tdf = pandas.read_csv(StringIO(tsvs), sep="\t", engine="python", error_bad_lines=False) #Turn TSV data into text par = 0 line = 0 text = "" for row in tdf.itertuples(): token = str(row.text) #adjust tokens if token == "nan": continue if row.par_num != par: par = row.par_num line = row.line_num token = "\n\n" + token + " " elif row.line_num != line: line = row.line_num token = "\n" + token + " " else: token = token + " " #compile text text = text + token #write text to file ocrf = open(savpath, **mode) ocrf.write(text) ocrf.close() #Drop TSV columns we don't need tdf = tdf.drop(columns=[ "level", "page_num", "block_num", "par_num", "line_num", "word_num" ]) #Sample TSV data if p != 1: samp = random.binomial(size=tdf.count()[0], n=1, p=p) r = 0 for s in samp: if s == 0: tdf = tdf.drop(r) r += 1 #if we aren't left with an empty data frame if tdf.count()[0] != 0: #add a column for the image name tdf["name"] = name #determine if a header is needed dirpath = os.path.split(savpath)[0] if append == True and os.path.exists( os.path.normpath(os.path.join(dirpath, tsvfile))) == True: header = False else: header = True #write data to TSV file tsvf = open(os.path.normpath(os.path.join(dirpath, tsvfile)), **mode) tdf.to_csv(tsvf, index=False, sep="\t", header=header) tsvf.close() return
def getPdfTextData(): # Scan the current directory for the .pdf file that will be converted for file_ in os.scandir(os.getcwd()): if file_.name.endswith('.pdf'): filenames.append(file_.name) # Load the .pdf file and save as .png DPI = 600 pdf = Image(filename=filenames[0], resolution=DPI) png_files = [] for page_num, page in enumerate(pdf.sequence): page = Image(page) page = page.convert('png') temp_file_name = 'temp_' + str(page_num) + '.png' png_files.append(temp_file_name) page.save(filename=temp_file_name) # Load the .png files and combine to make one image img_arrays = [] #if len(png_files) > 1: for file_ in (png_files): # open the first image file img = cv2.imread(file_) sz = img.shape start_row = round(0.05 * sz[0]) end_row = sz[0] - start_row # convert the image to numpy array img_arrays.append(np.array(img)) #[start_row:end_row,:,:])) # delete the temporary png file that we just read os.remove(file_) final_img_arr = np.vstack(img_arrays) img = PIL.Image.fromarray(final_img_arr).convert('L') threshold = 200 img = img.point(lambda p: p > threshold and 255) img.show() # this is where we will temporarily store the extracted text tsv_filename = "text.tsv" # Get TSV information about the text and write the temp file # --psm 3 or --psm 6 or --psm 11 is working well custom_oem_psm_config = r'--psm 3' text = pytesseract.image_to_data(img, config=custom_oem_psm_config) file_object = open(tsv_filename, 'w') file_object.write(text) file_object.close() # Read the TSV .txt file into a pandas dataframe csv_table = pd.read_table(tsv_filename, sep='\t') # delete the TSV (.txt) file we created above now that the data is read into pd os.remove(tsv_filename) # Drop the rows with NaN values csv_table = csv_table.dropna() csv_table = csv_table.reset_index(drop=True) # Clean the image # DOCUMENT TYPE 1 #Physician Referral - Print View https://prs.choosebroadspire.com/Template.aspx?PRSId=20191115... (Top) #1 of 4 11/15/19, 2:22 PM (Always on Bottom) # DOCUMENT TYPE 2 #11/8/2019 Physician Referral - Print View (Always on top) #https://prs.choosebroadspire.com/Template.a... 1/2 (Always on bottom) # Remove timestamps, datestamps, and website stamps # Find and drop the lines of text that match regular expressions like: # - reg_expr = 'http://%' # - reg_expr = '[0-1][0-9]/[0-3][0-9]/[0-9][0-9], [0-12]+:[0-5][0-9] [AP]M' print(csv_table) # if document is of type 1 if csv_table.text.values[0] == 'Physician': cond_line1 = csv_table.text.str.contains('https://.*', regex=True) cond_line2 = csv_table.text.str.contains( '[0-9]{1,2}/[0-9]{1,2}/[0-9]{1,2},', regex=True) line1_tops = csv_table[cond_line1].top.values line1_heights = csv_table[cond_line1].height.values line2_tops = csv_table[cond_line2].top.values line2_heights = csv_table[cond_line2].height.values # now that we have the top of the bounding boxes for each line # lets search through all other entries in the data frame that # have tops in a small interval around the top of lines of interest for top1, top2, height1, height2 in zip(line1_tops, line2_tops, line1_heights, line2_heights): cond11 = csv_table['top'].ge(top1 - height1 * 0.7) cond12 = csv_table['top'].le(top1 + height1 * 0.7) line1_table = csv_table[cond11 & cond12] #get the indices for the table above and drop them from the final table cond21 = csv_table['top'].ge(top2 - height2 * 0.7) cond22 = csv_table['top'].le(top2 + height2 * 0.7) line2_table = csv_table[cond21 & cond22] line1_indices = line1_table.index.values line2_indices = line2_table.index.values csv_table = csv_table.drop(index=line1_indices) csv_table = csv_table.drop(index=line2_indices) csv_table = csv_table.reset_index(drop=True) # if document is of type 2 if csv_table.head(1).text.str.contains('[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}', regex=True).values[0]: # Because document of type 2 have header lines we want to remove containing dates, # we need to be careful not to delete other lines within the document that contain dates. # To do this, we find all strings that have the date form and filter those by adding the contstraint # that the next index in the table should be Physician Referral # get all a table with matches for dates dates_cond = csv_table.text.str.contains( '[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}', regex=True) dates_indices = csv_table[dates_cond].index.values dates_indices1 = dates_indices.tolist() line1_tops = [] line1_heights = [] for index in dates_indices: if csv_table.at[index + 1, 'text'] == 'Physician' and csv_table.at[ index + 2, 'text'] == 'Referral': line1_tops.append(csv_table.at[index, 'top']) line1_heights.append(csv_table.at[index, 'height']) else: continue cond_line2 = csv_table.text.str.contains('https://.*', regex=True) line2_tops = csv_table[cond_line2].top.values line2_heights = csv_table[cond_line2].height.values # now that we have the top of the bounding boxes for each line # lets search through all other entries in the data frame that # have tops in a small interval around the top of lines of interest print('********') print(line1_tops) print(line1_heights) print(line2_tops) print(line2_heights) for top1, top2, height1, height2 in zip(line1_tops, line2_tops, line1_heights, line2_heights): print('****') cond11 = csv_table['top'].ge(top1 - height1 * 0.7) cond12 = csv_table['top'].le(top1 + height1 * 0.7) line1_table = csv_table[cond11 & cond12] line1_indices = line1_table.index.values print(line1_indices) csv_table = csv_table.drop(index=line1_indices) csv_table = csv_table.reset_index(drop=True) #get the indices for the table above and drop them from the final table cond21 = csv_table['top'].ge(top2 - height2 * 0.7) cond22 = csv_table['top'].le(top2 + height2 * 0.7) line2_table = csv_table[cond21 & cond22] line2_indices = line2_table.index.values print(line2_indices) csv_table = csv_table.drop(index=line2_indices) csv_table = csv_table.reset_index(drop=True) return csv_table
def check(path=r'./imgs/300/Sample11-0.jpg', dpi=300): for p in range(11, 12): for o in range(3, 4): img = cv2.imread('./imgs/200/Sample21-0.jpg') img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img1 = copy.deepcopy(img) #temp = sys.stdout #f = open('out.txt', 'w') #sys.stdout = f d = pytesseract.image_to_data( img, output_type=pytesseract.Output.DICT, lang='eng', config= '-c tessedit_char_whitelist="$%@.,&():ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 " --psm {} --oem {}' .format(p, o)) n_boxes = len(d['level']) print(d) #f.close() #sys.stdout = temp print('Read image') try: width = img.shape[1] height = img.shape[0] fps = 1 font = cv2.FONT_HERSHEY_SIMPLEX thickness = 2 color = (0, 0, 255) fontscale = 1 org = (width - 1000, height - 100) org1 = (width - 1000, height - 200) #fourcc = VideoWriter_fourcc(*'XVID') #video = VideoWriter('tess_with_sp_psm_{}_oem_{}.avi'.format(p, o), fourcc, float(fps), (width, height)) for i in range(n_boxes): #if(d['width'][i] <= 10 or d['height'][i] <= 10): # continue if (d['text'][i] == ''): continue (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) img1 = img1[y - 5:y + h + 5, x - 5:x + w + 5] print( pytesseract.image_to_string( img1, output_type=pytesseract.Output.DICT, lang='eng', config= '-c tessedit_char_whitelist="$%@.,&():ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 " --psm {} --oem {}' .format(p, o))) cv2.imwrite('temp/{}.jpg'.format(i), img1) img1 = copy.deepcopy(img) text = 'w_{}_h_{}'.format(d['width'][i], d['height'][i]) image = cv2.putText(img1, text, org, font, fontscale, color, thickness, cv2.LINE_AA) image = cv2.putText(img1, d['text'][i], org1, font, fontscale, color, thickness, cv2.LINE_AA) #video.write(img1) cv2.imwrite( './{}_tess_with_sp_psm_{}_oem_{}.jpg'.format(dpi, p, o), img) print('image saved') except Exception as e: print('fsdfa') print("{} {}".format(p, o)) print(e)
im_resized = im.resize(size, Image.ANTIALIAS) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png') temp_filename = temp_file.name im_resized.save(temp_filename, dpi=(300, 300)) #pytesseract #config = "-l eng --oem 1 --psm 6 -c preserve_interword_spaces=1x1 -c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" config = "-l eng --oem 1 --psm 6 -c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" text = pytesseract.image_to_string(temp_filename, config=config) print(text) rowtext.append(text) #pytesseract score_retrieval text_score = pytesseract.image_to_data(cleaned_img, output_type='data.frame', config=config) text_score = text_score[text_score.conf != -1] lines = text_score.groupby(['block_num'])['text'].apply(list) conf = list(text_score.groupby(['block_num'])['conf'].mean()) conf_score_list.append(conf) #CRNN_Sparse #comment these two below lines for enabling pytesseract # text = textdetector(temp_filename, model_crnnspar) # print(text) # rowtext.append(text) csv_writer.writerow(rowtext) row_para.append(rowtext) csvfile.close()
from wand.image import Image as wand_Image from PIL import Image as PIL_Image from wand.color import Color import os import pytesseract def build_images(force=False): if not all([not force, os.path.isfile('./foo-0.png'), os.path.isfile('./foo-0.png')]): all_pages = wand_Image(filename='./example.pdf', resolution=300) for idx, page in enumerate(all_pages.sequence): with Image(page) as i: i.format = 'png' i.background_color = Color('white') i.alpha_channel = 'remove' i.save(filename='foo-%s.png' % idx) build_images() boxes = pytesseract.image_to_boxes(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT) data = pytesseract.image_to_data(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT) osd = pytesseract.image_to_osd(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT)