def test_image_to_data__pandas_support(test_file_small):
    with pytest.raises(TSVNotSupported):
        image_to_data(test_file_small, output_type=Output.DATAFRAME)
Esempio n. 2
0
import cv2
import pytesseract
from pytesseract import Output

img = cv2.imread('page_1.jpg')

d = pytesseract.image_to_data(img, output_type=Output.DICT)
print(d.keys())

n_boxes = len(d['text'])
for i in range(n_boxes):
    if int(d['conf'][i]) > 60:
        (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i],
                        d['height'][i])
        img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imwrite("C:/New folder/New Vision Soft/OCR/ITR/box_img.jpg", img)

cv2.imshow('img', img)
cv2.waitKey(0)
Esempio n. 3
0
def camIdMotor():

    font = cv2.FONT_HERSHEY_SIMPLEX

    def image_smoothening(img):
        ret1, th1 = cv2.threshold(img, BINARY_THREHOLD, 255, cv2.THRESH_BINARY)
        ret2, th2 = cv2.threshold(th1, 0, 255,
                                  cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        blur = cv2.GaussianBlur(th2, (1, 1), 0)
        ret3, th3 = cv2.threshold(blur, 0, 255,
                                  cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        return th3

    def remove_noise_and_smooth(img):
        print('Removing noise and smoothening image')
        # img = cv2.imread(file_name, 0)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        filtered = cv2.adaptiveThreshold(gray.astype(np.uint8), 255,
                                         cv2.ADAPTIVE_THRESH_MEAN_C,
                                         cv2.THRESH_BINARY, 41, 3)
        kernel = np.ones((1, 1), np.uint8)
        opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel)
        closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
        img = image_smoothening(gray)
        or_image = cv2.bitwise_or(gray, closing)
        return or_image

    def gaussian_kernel(size, sigma=1):
        size = int(size) // 2
        x, y = np.mgrid[-size:size + 1, -size:size + 1]
        normal = 1 / (2.0 * np.pi * sigma**2)
        g = np.exp(-((x**2 + y**2) / (2.0 * sigma**2))) * normal
        return g

    def sobel_filters(img):
        Kx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], np.float32)
        Ky = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], np.float32)

        Ix = ndimage.filters.convolve(img, Kx)
        Iy = ndimage.filters.convolve(img, Ky)

        G = np.hypot(Ix, Iy)
        G = G / G.max() * 255
        theta = np.arctan2(Iy, Ix)
        return (G, theta)

    def non_max_suppression(img, D):
        M, N = img.shape
        Z = np.zeros((M, N), dtype=np.int32)
        angle = D * 180. / np.pi
        angle[angle < 0] += 180

        for i in range(1, M - 1):
            for j in range(1, N - 1):
                try:
                    q = 255
                    r = 255

                    # angle 0
                    if (0 <= angle[i, j] < 22.5) or (157.5 <= angle[i, j] <=
                                                     180):
                        q = img[i, j + 1]
                        r = img[i, j - 1]
                    # angle 45
                    elif (22.5 <= angle[i, j] < 67.5):
                        q = img[i + 1, j - 1]
                        r = img[i - 1, j + 1]
                    # angle 90
                    elif (67.5 <= angle[i, j] < 112.5):
                        q = img[i + 1, j]
                        r = img[i - 1, j]
                    # angle 135
                    elif (112.5 <= angle[i, j] < 157.5):
                        q = img[i - 1, j - 1]
                        r = img[i + 1, j + 1]

                    if (img[i, j] >= q) and (img[i, j] >= r):
                        Z[i, j] = img[i, j]
                    else:
                        Z[i, j] = 0

                except IndexError as e:
                    pass

        return Z

    def threshold(img):

        highThreshold = img.max() * highthreshold
        lowThreshold = highThreshold * lowthreshold

        M, N = img.shape
        res = np.zeros((M, N), dtype=np.int32)

        weak = np.int32(weak_pixel)
        strong = np.int32(strong_pixel)

        strong_i, strong_j = np.where(img >= highThreshold)
        zeros_i, zeros_j = np.where(img < lowThreshold)

        weak_i, weak_j = np.where((img <= highThreshold)
                                  & (img >= lowThreshold))

        res[strong_i, strong_j] = strong
        res[weak_i, weak_j] = weak

        return (res)

    def hysteresis(self, img):

        M, N = img.shape
        weak = self.weak_pixel
        strong = self.strong_pixel

        for i in range(1, M - 1):
            for j in range(1, N - 1):
                if (img[i, j] == weak):
                    try:
                        if ((img[i + 1, j - 1] == strong)
                                or (img[i + 1, j] == strong)
                                or (img[i + 1, j + 1] == strong)
                                or (img[i, j - 1] == strong)
                                or (img[i, j + 1] == strong)
                                or (img[i - 1, j - 1] == strong)
                                or (img[i - 1, j] == strong)
                                or (img[i - 1, j + 1] == strong)):
                            img[i, j] = strong
                        else:
                            img[i, j] = 0
                    except IndexError as e:
                        pass

        return img

    def hysteresis(img):

        M, N = img.shape
        weak = weak_pixel
        strong = strong_pixel

        for i in range(1, M - 1):
            for j in range(1, N - 1):
                if (img[i, j] == weak):
                    try:
                        if ((img[i + 1, j - 1] == strong)
                                or (img[i + 1, j] == strong)
                                or (img[i + 1, j + 1] == strong)
                                or (img[i, j - 1] == strong)
                                or (img[i, j + 1] == strong)
                                or (img[i - 1, j - 1] == strong)
                                or (img[i - 1, j] == strong)
                                or (img[i - 1, j + 1] == strong)):
                            img[i, j] = strong
                        else:
                            img[i, j] = 0
                    except IndexError as e:
                        pass

        return img

    def detectarPlaca(img):

        ##TESTING
        # blurred_image = gaussian_blur(image, kernel_size=9, verbose=False)

        # img_smoothed = convolve(img, gaussian_kernel(kernel_size, sigma))

        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # gradientMat, thetaMat = sobel_filters(img_smoothed)
        # nonMaxImg = non_max_suppression(gradientMat, thetaMat)
        # thresholdImg = threshold(nonMaxImg)
        # img_final = hysteresis(thresholdImg)

        # imgNoise = remove_noise_and_smooth(img)
        # grayNew= unsharp_mask(img)
        # im_resized(img, dpi=(300, 300))  # best for OCR
        # img = cv2.resize(img, (300, 300))
        # img = cv2.medianBlur(img, 5)

        # gray = cv2.blur(gray, (5, 5))
        # gray= cv2.medianBlur(gray, 5)
        # gray= img.gaussian_kernel(5)
        gray = cv2.GaussianBlur(gray, (5, 5), 0)

        # gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

        # ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
        # ret, thresh2 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
        # ret, thresh3 = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC)
        # ret, thresh4 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO)
        # ret, thresh5 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO_INV)

        ## OTHER
        # th2 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
        # th3 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
        # ret3, th3 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        canny = cv2.Canny(gray, 50, 150)
        # kernel = np.ones((5, 5), np.uint8)

        canny = cv2.dilate(canny, None, iterations=2)
        # canny = cv2.erode(canny, None, iterations=1)
        kernel = np.ones((5, 5), np.uint8)

        # canny = cv2.morphologyEx(canny, cv2.MORPH_CLOSE, None)
        # canny = cv2.morphologyEx(canny, cv2.MORPH_GRADIENT,None)
        # canny = cv2.dilate(canny, None, iterations=2)
        # canny= cv2.bitwise_not(canny)
        # canny = cv2.erode(canny, kernel, iterations=1)

        return canny

    def unsharp_mask(image,
                     kernel_size=(5, 5),
                     sigma=1.0,
                     amount=1.0,
                     threshold=0):
        """Return a sharpened version of the image, using an unsharp mask."""
        blurred = cv2.GaussianBlur(image, kernel_size, sigma)
        sharpened = float(amount + 1) * image - float(amount) * blurred
        sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
        sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
        sharpened = sharpened.round().astype(np.uint8)
        if threshold > 0:
            low_contrast_mask = np.absolute(image - blurred) < threshold
            np.copyto(sharpened, image, where=low_contrast_mask)
        return sharpened

    def binarizacion(img):
        # img = cv2.imread('gradient.png', 0)

        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        ret, thresh1 = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
        ret, thresh2 = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
        ret, thresh3 = cv2.threshold(img, 127, 255, cv2.THRESH_TRUNC)
        ret, thresh4 = cv2.threshold(img, 127, 255, cv2.THRESH_TOZERO)
        ret, thresh5 = cv2.threshold(img, 127, 255, cv2.THRESH_TOZERO_INV)
        # titles = ['Original Image', 'BINARY', 'BINARY_INV', 'TRUNC', 'TOZERO', 'TOZERO_INV']
        # images = [img, thresh1, thresh2, thresh3, thresh4, thresh5]
        return thresh5

    def conTornos(src):

        # gris = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
        # Aplicar suavizado Gaussiano
        # gauss = cv2.GaussianBlur(gris, (5, 5), 0)
        # canny = cv2.Canny(gauss, 50, 150)

        # imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # ret, thresh = cv2.threshold(imgray, 127, 255, 0)
        # im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        # gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
        # gray = cv2.GaussianBlur(gray, (7, 7), 3)

        # t, dst = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_TRIANGLE)

        # gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
        # _, th = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)
        # canny = cv2.Canny(th, 50, 150)
        # img1, contornos1, hierarchy1 = cv2.findContours(th, cv2.RETR_EXTERNAL,
        #                                                cv2.CHAIN_APPROX_NONE)
        # img2, contornos2, hierarchy2 = cv2.findContours(th, cv2.RETR_EXTERNAL,
        #                                                cv2.CHAIN_APPROX_SIMPLE)
        # contornos1, hierarchy1 = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

        gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
        # gray = cv2.blur(gray, (3, 3))
        gray = cv2.bilateralFilter(gray, 11, 17, 17)  # Blur to reduce noise
        th3 = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, \
                                    cv2.THRESH_BINARY, 11, 2)
        canny = cv2.Canny(gray, 150, 200)
        # canny = cv2.dilate(canny, None, iterations=1)
        canny = cv2.morphologyEx(canny, cv2.MORPH_OPEN, None)

        return canny

    def maskNew2(img):

        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # grayscale
        _, thresh = cv2.threshold(gray, 150, 255,
                                  cv2.THRESH_BINARY_INV)  # threshold
        kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
        dilated = cv2.dilate(thresh, kernel, iterations=13)  # dilate

        return dilated

    def maskNew1(img):
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        #canny = cv2.Canny(gray_img, 100, 200)
        # gray_img = canny
        ret1, th1 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY)

        # th3 = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
        # global thresholding
        ret1, th1 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY)

        # Otsu's thresholding
        ret2, th2 = cv2.threshold(gray_img, 0, 255,
                                  cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        # Otsu's thresholding after Gaussian filtering
        blur = cv2.GaussianBlur(gray_img, (5, 5), 0)
        ret3, th3 = cv2.threshold(blur, 0, 255,
                                  cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        ##other
        ret, thresh1 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY)
        ret, thresh2 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY_INV)
        ret, thresh3 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_TRUNC)
        ret, thresh4 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_TOZERO)
        ret, thresh5 = cv2.threshold(gray_img, 127, 255, cv2.THRESH_TOZERO_INV)

        return th2

    def rescalingNew(imgNew):
        img = cv2.resize(imgNew,
                         None,
                         fx=1.2,
                         fy=1.2,
                         interpolation=cv2.INTER_CUBIC)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # kernel = np.ones((1, 1), np.uint8)
        # img = cv2.dilate(img, kernel, iterations=1)
        # img = cv2.erode(img, kernel, iterations=1)
        # cv2.threshold(cv2.bilateralFilter(img, 5, 75, 75), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
        # cv2.threshold(cv2.medianBlur(img, 3), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
        # cv2.adaptiveThreshold(cv2.bilateralFilter(img, 9, 75, 75), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        #                       cv2.THRESH_BINARY, 31, 2)
        cv2.adaptiveThreshold(cv2.medianBlur(img, 3), 255,
                              cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                              cv2.THRESH_BINARY, 31, 2)

        return img

    def maskNew(img):
        frame = cv2.resize(img, (400, 300), interpolation=cv2.INTER_CUBIC)
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        lowerBlue = np.array([100, 50, 50])
        upperBlue = np.array(
            [130, 255, 255]
        )  # cv2.adaptiveThreshold(cv2.medianBlur(img, 3), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)

        mask = cv2.inRange(hsv, lowerBlue, upperBlue)
        return mask

    def detectaCode(img):
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        gray_img = cv2.bitwise_not(gray_img)
        return gray_img

    def umbralChangeBack(img):
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(gray_img, 127, 255,
                                  cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        img_contours = cv2.findContours(thresh, cv2.RETR_TREE,
                                        cv2.CHAIN_APPROX_SIMPLE)[-2]
        img_contours = sorted(img_contours, key=cv2.contourArea)

        for i in img_contours:

            if cv2.contourArea(i) > 100:
                break

        mask = np.zeros(img.shape[:2], np.uint8)
        cv2.drawContours(mask, [i], -1, 255, -1)
        new_img = cv2.bitwise_and(img, img, mask=mask)
        return new_img

    def contrast(img):
        return cv2.addWeighted(img, 1.5, np.zeros(img.shape, img.dtype), 0, 0)

    def focusing(val):
        value = (val << 4) & 0x3ff0
        data1 = (value >> 8) & 0x3f
        data2 = value & 0xf0
        os.system("i2cset -y 0 0x0c %d %d" % (data1, data2))

    def sobel(img):
        img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        img_sobel = cv2.Sobel(img_gray, cv2.CV_16U, 1, 1)
        return cv2.mean(img_sobel)[0]

    def laplacian(img):
        img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        img_sobel = cv2.Laplacian(img_gray, cv2.CV_16U)
        return cv2.mean(img_sobel)[0]

    # get grayscale image
    def get_grayscale(image):
        img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        ret, thresh2 = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
        # Aplicar suavizado Gaussiano
        gauss = cv2.GaussianBlur(thresh2, (5, 5), 0)
        # canny = cv2.Canny(gauss, 100, 200)
        # canny= auto_canny(img)
        return gauss

    def get_grayscaleNEW(image):
        return cv2.bilateralFilter(image, 11, 17, 17)

    # noise removal
    def remove_noise(image):
        return cv2.medianBlur(image, 5)

    # thresholding
    def thresholding(image):
        return cv2.threshold(image, 0, 255,
                             cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

    def thresHolding(img):
        return cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)

    # dilation
    def dilate(image):
        kernel = np.ones((5, 5), np.uint8)
        return cv2.dilate(image, kernel, iterations=1)

    # erosion
    def erode(image):
        kernel = np.ones((5, 5), np.uint8)
        return cv2.erode(image, kernel, iterations=1)

    # opening - erosion followed by dilation
    def opening(image):
        kernel = np.ones((5, 5), np.uint8)
        return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

    # canny edge detection
    def canny(image):
        return cv2.Canny(image, 100, 200)

    # skew correction

    def auto_canny(image, sigma=0.33):
        # compute the median of the single channel pixel intensities
        v = np.median(image)
        # apply automatic Canny edge detection using the computed median
        lower = int(max(0, (1.0 - sigma) * v))
        upper = int(min(255, (1.0 + sigma) * v))
        edged = cv2.Canny(image, lower, upper)
        # return the edged image
        return edged

    def deskew(image):
        coords = np.column_stack(np.where(image > 0))
        angle = cv2.minAreaRect(coords)[-1]
        if angle < -45:
            angle = -(90 + angle)

        else:
            angle = -angle
        (h, w) = image.shape[:2]
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, angle, 1.0)
        rotated = cv2.warpAffine(image,
                                 M, (w, h),
                                 flags=cv2.INTER_CUBIC,
                                 borderMode=cv2.BORDER_REPLICATE)
        return rotated

    # template matching
    def match_template(image, template):
        return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)

    ap = argparse.ArgumentParser()
    ap.add_argument("-v", "--video", help="path to the (optional) video file")
    args = vars(ap.parse_args())

    # if the video path was not supplied, grab the reference to the
    # camera
    if not args.get("video", False):
        vs = VideoStream(src=0).start()
        # vs = cv2.VideoCapture(0)
        time.sleep(2.0)

    # otherwise, load the video
    else:
        vs = cv2.VideoCapture(args["video"])

    # keep looping over the frames
    while True:

        # check to see if we have reached the end of the
        # video
        # time.sleep(2.0)
        frame = vs.read()
        frame = frame[1] if args.get("video", False) else frame

        if frame is None:
            break

        # length = int(vs.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT))
        # width = int(vs.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH))
        # height = int(vs.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT))
        # fps = vs.get(cv2.cv.CV_CAP_PROP_FPS)
        # grab the current frame and then handle if the frame is returned
        # from either the 'VideoCapture' or 'VideoStream' object,
        # respectively

        # frame = thresholding(frame)
        # frame =  opening(frame)
        # frame = canny(frame)

        # frame = canny(frameOriginal)
        # frameModificado = get_grayscale(frame)
        # frameModificado = binarizacion(frame)
        frameModificado = detectarPlaca(frame)
        # frame= image_smoothening(frame)
        # frame= auto_canny(frame)

        # frame = sobel(frame)
        # frame = laplacian(frame)
        # frame = umbralChangeBack(frame)
        # frame = detectaCode(frame)

        # frame = get_grayscale(frame)
        # frame = thresHolding(frame)
        # frame = maskNew1(frame)

        # frame = contrast(frame)
        # frame = conTornos(frame)
        # frame = maskNew1(frame)
        # frame= rescalingNew(frame)

        # contornos1,hierarchy1 = cv2.findContours(frameOriginal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

        cnts, _ = cv2.findContours(frameModificado, cv2.RETR_LIST,
                                   cv2.CHAIN_APPROX_SIMPLE)  # OpenCV 4

        # print('Número de contornos encontrados: ', len(cnts))

        for c in cnts:
            area = cv2.contourArea(c)
            if area > 1500 and area < 6000:
                # print(area)
                # nuevoContorno = cv2.convexHull(c)

                perimeter = cv2.arcLength(c, True)
                epsilon = 0.1 * cv2.arcLength(c, True)
                approx = cv2.approxPolyDP(c, epsilon, True)
                hull = cv2.convexHull(c)

                M = cv2.moments(c)
                if (M["m00"] == 0): M["m00"] = 1
                x = int(M["m10"] / M["m00"])
                y = int(M['m01'] / M['m00'])

                cv2.drawContours(frame, [approx], 0, (255, 0, 0), 3)
                # cv2.drawContours(frame, c, -1, (0, 0, 255), 3)
                # Dibujar el centro
                cv2.circle(frame, (x, y), 3, (0, 0, 255), -1)
                # cv2.drawContours(frame, cnts, -1, (0, 255, 0), 2)
                # epsilon = 0.01 * cv2.arcLength(c, True)
                # approx = cv2.approxPolyDP(c, epsilon, True)
                # print(len(approx))
                # x, y, w, h = cv2.boundingRect(approx)
                # print('AREA', x, y, w, h)
                # placa = frameModificado[y:y + h, x:x + w]
                # textNew = pytesseract.image_to_string(placa, config='--psm 11')
                # print('PLACA: ', textNew)
                # textCycle = pytesseract.image_to_string([nuevoContorno], config='--psm 11')
                # print(textCycle)

        custom_oem_psm_config = r'--oem 3 --psm 6'
        # config = '--psm 11'
        d = pytesseract.image_to_data(frameModificado,
                                      output_type=Output.DICT,
                                      config=custom_oem_psm_config)
        #print(d.keys())
        print(d['text'])

        ## EXAMPLE
        text = pytesseract.image_to_string(frameModificado,
                                           config=custom_oem_psm_config)
        # print("Detected Number is:", text)

        x = 0
        y = 0
        n_boxes = len(d['text'])
        for i in range(n_boxes):
            if int(d['conf'][i]) > 60:
                (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i],
                                d['height'][i])

                frameModificado = cv2.rectangle(frameModificado, (x, y),
                                                (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(frameModificado, '{}'.format(d['text']), (x - 60, y + 160),
                    font, 1.00, (0, 0, 0), 2, cv2.LINE_AA)

        cv2.imshow('Frame', frame)
        cv2.imshow('Frame_Modificado', frameModificado)
        # cv2.imshow("Frame_Original", frameOriginal)

        # Salir con 'ESC'

        patronNew = re.compile('^FKI[0-9][0-9][0-9][0-9][0-9]')

        tempLength = d['text']

        print(len(d['text']))

        for t in d['text']:
            findText = re.search(patronNew, t, flags=0)
            # print("Iteracion: " + t)
            if findText:
                print("Detected Number is:", findText.group(0))
                break

        # findText = re.search(patronNew, text, flags=0)
        # print(findText.group(0))
        if findText:
            ## print("Detected Number is:", findText.group(0))
            break

        k = cv2.waitKey(5) & 0xFF
        if k == 27:
            break

    # if we are not using a video file, stop the video file stream
    if not args.get("video", False):
        vs.stop()

    # otherwise, release the camera pointer
    else:
        vs.release()

    # close all windows
    cv2.destroyAllWindows()
    # waitTime = findText.group(0)
    # print('waitTime: ' + waitTime)
    return findText.group(0)
Esempio n. 4
0
titles = [
    'Original Image', 'BINARY', 'BINARY_INV', 'TRUNC', 'TOZERO', 'TOZERO_INV'
]
images = [img, thresh1, thresh2, thresh3, thresh4, thresh5]

for i in range(6):
    plt.subplot(2, 3, i + 1)
    plt.imshow(images[i], 'gray', vmin=0, vmax=255)
    plt.title(titles[i])
    plt.xticks([]), plt.yticks([])

plt.show()

custom_config = r'--oem 3 --psm 6 -l kor+eng'
words = pytesseract.image_to_data(thresh4,
                                  config=custom_config,
                                  output_type=Output.DICT)  ## pytesseract

n_boxes = len(words['text'])
print(n_boxes)
for i in range(n_boxes):
    if int(words['conf'][i]) > 60:
        (x, y, w, h) = (words['left'][i], words['top'][i], words['width'][i],
                        words['height'][i])
        cv.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 1)
        cv.putText(img, words['text'][i], (x, y), cv.FONT_HERSHEY_COMPLEX, 0.5,
                   (0, 0, 0), 1)
        img_crop = img[y:y + h, x:x + w]
        cv.imshow('img_crop', img_crop)
        cv.imshow('Resource', img)
        cv.waitKey(0)
    def __init__(self,
                 dir_to_find=None,
                 oem=None,
                 psm=None,
                 parent=None,
                 filter_image_selected=None):
        from os import scandir, path

        # App Message
        if parent == None:
            self.parent = self
            self.parent.info = self.info

        #check exist and set path of EXE Tesseract
        exe_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
        if not path.exists(exe_tesseract):
            self.parent.info('File not exist: ' + exe_tesseract)
            return
        pytesseract.pytesseract.tesseract_cmd = exe_tesseract

        # Adding custom options
        if not oem == None and not psm == None:
            custom_config = r'--oem ' + str(oem) + ' --psm ' + str(psm[:2])
        elif not oem == None:
            custom_config = r'--oem ' + str(oem)
            custom_config = r' --psm' + str(psm)
        else:
            custom_config = r'--oem 3 '

        # Select Photo Directory
        if dir_to_find == None:
            self.select_directory()
        else:
            self.dir_to_find = dir_to_find

        result = {}
        correct = 0
        incorrect = 0
        # Create TXT File with Results
        file_txt = open(path.join(self.dir_to_find, 'Result_OCR_Scan.txt'),
                        'w')
        file_txt.write('Configuration ' + custom_config + chr(10))
        file_txt.write('Filter ' + filter_image_selected + chr(10) + chr(10))

        for file in scandir(self.dir_to_find):
            # extract name file and extension separate
            name, ext = path.splitext(file.name)

            # Find coordinates from a picture file
            if ext.upper() == '.JPG' or ext.upper() == '.JPEG' or ext.upper(
            ) == '.PNG':
                find = False
                incorrect += 1
                day = ' No Encontrado'
                hour = ''
                lat = ''
                lon = ''

                # Filter Image for a good reading
                self.filter_image(file.path, filter_image_selected)

                read = pytesseract.image_to_data(self.img,
                                                 output_type=Output.DICT)

                for i in range(len(read['text'])):
                    if read['text'][i] == 'GARMIN':
                        find = True
                        day = read['text'][i + 1]
                        hour = read['text'][i + 2]
                        lat = float(read['text'][i + 3])
                        lon = float(read['text'][i + 4])

                        self.geotag_photo(file.path, lat, lon)
                        incorrect -= 1
                        correct += 1
                        break

                result[file.path] = {
                    'day': day,
                    'hour': hour,
                    'lat': lat,
                    'lon': lon,
                    'find': find
                }
                text_result = file.path + " " + day + " " + hour + " " + str(
                    lat) + " " + str(lon)
                file_txt.write(text_result + chr(10))
                self.parent.info(text_result)

        #self.parent.info(result)
        self.parent.info('GeoLocalizados   : ' + str(correct))
        self.parent.info('No GeoLocalizados: ' + str(incorrect))

        file_txt.write('GeoLocalizados   : ' + str(correct) + chr(10))
        file_txt.write('No GeoLocalizados: ' + str(incorrect) + chr(10))
        file_txt.close()
Esempio n. 6
0
def imgtodata(i):
    j = pytesseract.image_to_data(i, output_type=Output.DICT)
    #print(j.keys())
    k = len(j['text'])
    return j, k
Esempio n. 7
0
"""
Created on Tue Mar  3 16:32:52 2020

@author: Namita
Simple program to detect text and boxing it using pytessersct

Usage : python box.py
"""

import cv2
import pytesseract
from pytesseract import Output

img_cv = cv2.imread('image.jpg')  # read image using image path
img = cv2.cvtColor(
    img_cv, cv2.COLOR_BGR2RGB)  # Because pytesseract works good on grayscale
d = pytesseract.image_to_data(
    img, output_type=Output.DICT)  #convert image to data object
# print(pytesseract.image_to_boxes(img))
# print(d['text'])
n_boxes = len(d['text'])
for i in range(n_boxes):
    if int(d['conf'][i]) > 60:
        (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i],
                        d['height'][i])
        img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imshow('IMG.png', img)  #display image
cv2.waitKey(
    0
)  #wait key zero means manual closing the window, You can add time in mils
Esempio n. 8
0
from numpy import asarray
#ap = argparse.ArgumentParser()
#p.add_argument("-i" , "--testID.jpg" , help = "path to input image to be OCR'd", default = "stdout")
#ap.add_argument ("-o" , "--output" , help = "path to the output CSV file")
#ap.add_argument("-c" , "--min conf" , help = "minimum confidence value to filter weak text detection")
#ap.add_argument("-d" , "--dist-thresh" , type = float , default = 25.0)
#ap.add_argument("-s", "--min-size", type = int, default = 2 , help = "minimum cluster size")

#args = vars(ap.parse_args())
#np.random.seed(42)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
custom_config = r' --oem 3 --psm 11'
images = cv2.imread(r"C:\Users\King alagbe\Downloads\testID.jpg")
images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB)
height, width, t = images.shape
boxes = pytesseract.image_to_data(images , config= custom_config)

print(boxes)
for x, b in enumerate(boxes.splitlines()):
    if x!=0:
    #print(b) 
     b =b.split()
     print(b) 
     if len(b) == 12:
      x,y,w,h = int(b[6]), int(b[7]), int(b[8]), int(b[9])
      cv2.rectangle(images, (x, y), (w + x, h + y),(225,0,0), 2)
      cv2.putText(images, b[11], (x,y), cv2.FONT_HERSHEY_COMPLEX,0.3,(50,50,255), 1)
      

cv2.imshow("Result", images)
cv2.waitKey(0)
Esempio n. 9
0
        def bot():
            # get window and rect
            window = windows[index]
            rect = windowRects[index]
            #sepperate x,y,w,h of rect
            x = rect[0]
            y = rect[1]
            w = rect[2]
            h = rect[3]
            #create bounding box: top and height are cut
            bounding_box = (x, y + 100, w, h - 160)

            waittime = 0

            try:
                while True:
                    if (keyboard.is_pressed("q")):
                        break
                    windowimg = ImageGrab.grab(bbox=bounding_box)
                    #conert to type L, means just black and white
                    windowimg = windowimg.convert('L')
                    #convert image to numpy array to work with pixels
                    img = np.array(windowimg)

                    # add Threshhold, just black and white
                    ret, img = cv2.threshold(img, 240, 255, cv2.THRESH_BINARY)

                    d = pytesseract.image_to_data(img,
                                                  output_type='data.frame')

                    cv2.imshow('screen', img)
                    if (cv2.waitKey(1) & 0xFF) == ord('q'):
                        cv2.destroyAllWindows()
                        break
                    # filter the results of tesseract for flyff
                    # these filter arent perfect yet
                    d = d[d['text'].apply(lambda x: isinstance(x, str))]
                    d = d[(d['conf'] != -1) & (d['conf'] > 2) &
                          (d['text'] != "") & (d['text'] != " ") &
                          (d['text'] != "NaN") &
                          (d['text'].apply(lambda x: len(x) > 2))]

                    # text in image is found
                    if (d.shape[0] > 0):
                        # get row of found text
                        row = d.iloc[0]
                        # + 20 because x + row[left] is the x-top left corner of text
                        monsterx = x + row['left'] + 20
                        # + 5 because y + row[top] is the y-top left corner of text
                        # + 100 because we did + 100 at bounding box earlier
                        monstery = y + row['top'] + 100 + 5
                        print("Found Monster in Window: " + str(window) +
                              " at x: " + str(monsterx) + " at y: " +
                              str(monstery))
                        #click
                        click(monsterx, monstery)
                        #spellcast
                        keyboard.press_and_release("c")

                        # general wait time for next analysis of image
                        time.sleep(0.1)

            except KeyboardInterrupt:
                exit()
Esempio n. 10
0
def inventory_item_full_info(slot_i, slot_pos, slot_size, ss_path):
    """
    Returns a tuple follow this format : (slot_i, item_name, amount) (int, str, int)
    It uses the item_amount function

    :param slot_i: <int> Slot index
    :param slot_pos: <tuple> Format (X, Y)
    :param slot_size: <int> Slot size (needed for item_amount screenshots)
    :param ss_path: <str> Path to save screenshots (it spams them) Example : "./screenshots/"
    """
    max_length = 400
    slot_info = (slot_i, "null", "null"
                 )  #Won't be changed if no items detected

    pydirectinput.moveTo(slot_pos[0], slot_pos[1])
    screenshot = pyautogui.screenshot(f"{ss_path}slot_{slot_i}.png",\
         region=(slot_pos[0]+17,slot_pos[1]-30, max_length, 24))#Takes Screenshot

    pixel = screenshot.getpixel(
        (1, 0))  #First top left pixel indicates if we found an item
    # item names are surrounded in a pink/black box, that we use to detect if one found

    if (25 <= pixel[0] <= 45) and (0 <= pixel[1] <= 15) and (80 <= pixel[2] <=
                                                             100):
        # More or less close to (35, 0, 90) that we're looking for
        # If entered here, an item has been detected

        width = max_length - 1
        while width >= 0:
            new_px = screenshot.getpixel((width, 0))
            if (25 <= new_px[0] <= 45) and (0 <= new_px[1] <=
                                            15) and (80 <= new_px[2] <= 100):
                # Means we reached the box around item name, no extra space on the right anymore
                break
            width -= 10
        print(width)

        screenshot = pyautogui.screenshot(f"{ss_path}slot_{slot_i}.png",\
             region=(slot_pos[0]+17,slot_pos[1]-30, width, 24))#New updated screenshot
        # Better OCR as a result of cropping the image

        content = pytesseract.image_to_data(screenshot)
        # image_to_data separates all text found in words

        item_name = ''

        for x, bx in enumerate(
                content.splitlines()):  # Each line holds different data

            if x != 0:  # First line is a different format and holds no data we need
                bx = bx.split()

                if len(bx) == 12:  # len is 12 when a word is found

                    item_name += f'{bx[11]} '  # word held at index 11 (last one)

        pyautogui.moveTo(
            64, 103)  #Outside of inventory to not block item screenshot

        amount = item_amount(slot_pos,
                             slot_size,
                             f'slot_{slot_i}_amount.png',
                             ss_path=ss_path)
        # Calls item_amount to get the amount on this slot

        slot_info = (slot_i, item_name, amount)

    return slot_info
Esempio n. 11
0
import mysql.connector
import sys
from gtts import gTTS
from PIL import Image
from pytesseract import image_to_string
from pytesseract import image_to_boxes
from pytesseract import image_to_osd
from pytesseract import image_to_data
from pytesseract import image_to_pdf_or_hocr
#from pytesseract import osd_to_dict
#from pytesseract import run_tesseract
#from pytesseract import run_and_get_output

img = Image.open("/home/san/aaa/example_03.png")
text = image_to_data(img)
print text

text = image_to_boxes(img)
print text

#======================================

# db= mysql.connector.connect(user="******",password="******",host="localhost",database="dsaa")

# mycursor = db.cursor()

# print """
# 	+=========================================+
# 	|    Please select  page no. of Book	  |
# 	+=========================================+
# 	  """
Esempio n. 12
0
def processImage():
    readImage = cv2.imread('screenshot.png')
    imageHeight, imageWidth = readImage.shape[:2]

    lowerWidthBound = int(.29 * imageWidth)  # 555px / 1920px
    upperWidthBound = int(.375 * imageWidth)  # 720px / 1920px
    lowerHeightBound = int(.435 * imageHeight)  # 615px / 1080px
    upperHeightBound = int(.57 * imageHeight)  # 470px / 1080px

    # Crop and block out colors on the image
    croppedImage = readImage[lowerHeightBound:upperHeightBound,
                             lowerWidthBound:upperWidthBound]

    lowerColorBound = np.array([27, 27, 27], dtype="uint16")
    upperColorBound = np.array([50, 50, 50], dtype="uint16")
    colorMask = cv2.inRange(croppedImage, lowerColorBound, upperColorBound)

    # Find text contours
    imageContours, imageHierarchy = cv2.findContours(colorMask,
                                                     cv2.RETR_EXTERNAL,
                                                     cv2.CHAIN_APPROX_SIMPLE)

    # Create lists to hold our Tesseract data
    possibleChars = []
    possibleCharsData = []

    # Process everything that may be text
    for possibleText in imageContours:
        # Create a bounding rectangle around the contour
        [x, y, width, height] = cv2.boundingRect(possibleText)

        # Draw rectangles around the contours on the original image
        cv2.rectangle(colorMask, (x, y), (x + width, y + height),
                      (255, 0, 255), 0)
        # Create our region of interest
        regionOfInterest = colorMask[y:(y + height), x:(x + width)]

        # Enlarge the region
        enlargedImage = cv2.copyMakeBorder(regionOfInterest,
                                           10,
                                           10,
                                           10,
                                           10,
                                           cv2.BORDER_CONSTANT,
                                           value=[255, 255, 255])

        # Run Tesseract OCT against the image to try and find the most likely keystroke
        possibleChars.append(
            pytesseract.image_to_string(
                regionOfInterest,
                lang='eng',
                config=
                '--psm 10 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz'
            ))
        possibleCharsData.append(
            pytesseract.image_to_data(
                regionOfInterest,
                lang='eng',
                config=
                '--psm 10 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz'
            ))

    # Return all of the possible characters for the AFK key
    return possibleChars, possibleCharsData
Esempio n. 13
0
import pytesseract
from PIL import Image
import cv2

print(pytesseract.image_to_data(Image.open('./img/np1.jpeg')))
print(pytesseract.image_to_data(Image.open('./img/np2.jpeg')))
img = cv2.imread('./img/np1.jpeg')
print(pytesseract.image_to_string(img))
Esempio n. 14
0
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    #check for 90 degrees rotation
    if ((width / float(height)) > 1):
        # crop a particular rectangle from the source image
        cropped_image = cv2.getRectSubPix(carsample_mask, (width, height),
                                          centre)
    else:
        # crop a particular rectangle from the source image
        cropped_image = cv2.getRectSubPix(carsample_mask, (height, width),
                                          centre)

    # convert into grayscale
    cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
    # equalize the histogram
    cropped_image = cv2.equalizeHist(cropped_image)
    # resize to 260 cols and 63 rows. (Just something I have set as standard here)
    cropped_image = cv2.resize(cropped_image, (260, 180))
    cropped_images.append(cropped_image)

_ = plt.subplots_adjust(hspace=0.000)
number_of_subplots = len(cropped_images)
for i, v in enumerate(range(number_of_subplots)):
    v = v + 1
    #ax1 = plt.subplot(number_of_subplots,1,v)
    showfig(cropped_images[i], plt.get_cmap('gray'))
    plt.show()
for img in cropped_images:
    data = pytesseract.image_to_data(Image.fromarray(img), output_type='dict')
    print(data)
Esempio n. 15
0
def ocr_img(img: np.array,
            input_file: str,
            search_str: str,
            highlight_readable_text: bool = False,
            action: str = 'Highlight',
            show_comparison: bool = False,
            generate_output: bool = True):
    """Scans an image buffer or an image file.
    Pre-processes the image.
    Calls the Tesseract engine with pre-defined parameters.
    Calculates the confidence score of the image grabbed content.
    Draws a green rectangle around readable text items having a confidence score > 30.
    Searches for a specific text.
    Highlight or redact found matches of the searched text.
    Displays a window showing readable text fields or the highlighted or redacted text.
    Generates the text content of the image.
    Prints a summary to the console."""
    # If image source file is inputted as a parameter
    if input_file:
        # Reading image using opencv
        img = cv2.imread(input_file)
    # Preserve a copy of this image for comparison purposes
    initial_img = img.copy()
    highlighted_img = img.copy()
    # Convert image to binary
    bin_img = convert_img2bin(img)
    # Calling Tesseract
    # Tesseract Configuration parameters
    # oem --> OCR engine mode = 3 >> Legacy + LSTM mode only (LSTM neutral net mode works the best)
    # psm --> page segmentation mode = 6 >> Assume as single uniform block of text (How a page of text can be analyzed)
    config_param = r'--oem 3 --psm 6'
    # Feeding image to tesseract
    details = pytesseract.image_to_data(bin_img,
                                        output_type=Output.DICT,
                                        config=config_param,
                                        lang='eng')
    # The details dictionary contains the information of the input image
    # such as detected text, region, position, information, height, width, confidence score.
    ss_confidence = calculate_ss_confidence(details)
    boxed_img = None
    # Total readable items
    ss_readable_items = 0
    # Total matches found
    ss_matches = 0
    for seq in range(len(details['text'])):
        # Consider only text fields with confidence score > 30 (text is readable)
        if float(details['conf'][seq]) > 30.0:
            ss_readable_items += 1
            # Draws a green rectangle around readable text items having a confidence score > 30
            if highlight_readable_text:
                (x, y, w, h) = (details['left'][seq], details['top'][seq],
                                details['width'][seq], details['height'][seq])
                boxed_img = cv2.rectangle(img, (x, y), (x + w, y + h),
                                          (0, 255, 0), 2)
            # Searches for the string
            if search_str:
                results = re.findall(search_str, details['text'][seq],
                                     re.IGNORECASE)
                for result in results:
                    ss_matches += 1
                    if action:
                        # Draw a red rectangle around the searchable text
                        (x, y, w,
                         h) = (details['left'][seq], details['top'][seq],
                               details['width'][seq], details['height'][seq])
                        # Details of the rectangle
                        # Starting coordinate representing the top left corner of the rectangle
                        start_point = (x, y)
                        # Ending coordinate representing the botton right corner of the rectangle
                        end_point = (x + w, y + h)
                        # Color in BGR -- Blue, Green, Red
                        if action == "Highlight":
                            color = (0, 255, 255)  # Yellow
                        elif action == "Redact":
                            color = (0, 0, 0)  # Black
                        # Thickness in px (-1 will fill the entire shape)
                        thickness = -1
                        boxed_img = cv2.rectangle(img, start_point, end_point,
                                                  color, thickness)

    if ss_readable_items > 0 and highlight_readable_text and not (
            ss_matches > 0 and action in ("Highlight", "Redact")):
        highlighted_img = boxed_img.copy()
    # Highlight found matches of the search string
    if ss_matches > 0 and action == "Highlight":
        cv2.addWeighted(boxed_img, 0.4, highlighted_img, 1 - 0.4, 0,
                        highlighted_img)
    # Redact found matches of the search string
    elif ss_matches > 0 and action == "Redact":
        highlighted_img = boxed_img.copy()
        # cv2.addWeighted(boxed_img, 1, highlighted_img, 0, 0, highlighted_img)
    # save the image
    cv2.imwrite("highlighted-text-image.jpg", highlighted_img)
    # Displays window showing readable text fields or the highlighted or redacted data
    if show_comparison and (highlight_readable_text or action):
        title = input_file if input_file else 'Compare'
        conc_img = cv2.hconcat([initial_img, highlighted_img])
        display_img(title, conc_img)
    # Generates the text content of the image
    output_data = None
    if generate_output and details:
        output_data = generate_ss_text(details)
    # Prints a summary to the console
    if input_file:
        summary = {
            "File": input_file,
            "Total readable words": ss_readable_items,
            "Total matches": ss_matches,
            "Confidence score": ss_confidence
        }
        # Printing Summary
        print(
            "## Summary ########################################################"
        )
        print("\n".join("{}:{}".format(i, j) for i, j in summary.items()))
        print(
            "###################################################################"
        )
    return highlighted_img, ss_readable_items, ss_matches, ss_confidence, output_data
Esempio n. 16
0
 def ocr(self, psm: int = 4, oem: int = 1):
     config = '--psm {} --oem {}'.format(psm, oem)
     ocr_df = pytesseract.image_to_data(self.img,
                                        config=config,
                                        output_type=Output.DATAFRAME)
     return TesseractOcrOperand(self, ocr_df)
def charsearchdata(what=49, graphic_enable=graphic_on):
    data = pytesseract.image_to_data(gray, config='--psm 13')
    print(data)
Esempio n. 18
0
ap.add_argument("-i",
                "--image",
                required=True,
                help="path to input image to be OCR'd")
ap.add_argument("-c",
                "--min-conf",
                type=int,
                default=0,
                help="mininum confidence value to filter weak text detection")
args = vars(ap.parse_args())

# load the input image, convert it from BGR to RGB channel ordering,
# and use Tesseract to localize each area of text in the input image
image = cv2.imread(args["image"])
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = pytesseract.image_to_data(rgb, output_type=Output.DICT, lang="jpn")

# loop over each of the individual text localizations
for i in range(0, len(results["text"])):
    # extract the bounding box coordinates of the text region from
    # the current result
    x = results["left"][i]
    y = results["top"][i]
    w = results["width"][i]
    h = results["height"][i]

    # extract the OCR text itself along with the confidence of the
    # text localization
    text = results["text"][i]
    conf = int(results["conf"][i])
Esempio n. 19
0
            return ''.join(cuit.groups())


#endregion

#region - "MAIN"
if __name__ == "__main__":

    obtener_puntos()
    niveles = sorted(niveles)
    lineas = obtener_lineas(puntos, niveles)
    obtener_secciones(lineas)

    for sec in range(len(secciones)):
        key = 'Seccion-' + str(sec + 1)
        boxes = pytesseract.image_to_data(secciones[sec])
        ocr_secciones[key] = limpiar_boxes(boxes)
        '''cv2.imshow("Seccion", secciones[sec])
		cv2.waitKey(0)'''

    print(ocr_secciones)
    cv2.destroyAllWindows()

#endregion
#region - PROCESAMIENTO PALABRAS

nombre_obtenido = False
for key in ocr_secciones:
    if len(ocr_secciones[key]) > 3 and (not nombre_obtenido):
        #print(ocr_secciones[key])
        razon_social = obtener_razon_social(ocr_secciones[key])
Esempio n. 20
0
import pytesseract

pytesseract.pytesseract.tesseract_cmd="C:/Program Files (x86)/Tesseract-OCR/tesseract.exe"
img=cv2.imread('1.jpg')
img1=img.copy()
img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
#print(pytesseract.image_to_string(img ))
#for image to Boxes
imx,imy,imc=img.shape
print(imx)
boundary=pytesseract.image_to_boxes(img )
for b in boundary.splitlines():
    b=b.split(' ')
    x,y,w,h=int(b[1]),int(b[2]),int(b[3]),int(b[4])
    cv2.rectangle(img,(x,imx-y),(w,imx-h),(255,0,0),1)
    cv2.putText(img,b[0],(x,imx-y-20),cv2.FONT_HERSHEY_DUPLEX,0.34,(0,0,0),1)
    cv2.imshow('IMAGE TO LETTERS', img)

#image to Data
boundary1=pytesseract.image_to_data(img1)
for c,b1 in enumerate(boundary1.splitlines()):
    if(c!=0):
        b1=b1.split()
        if len(b1)==12:
            x1,y1,w1,h1=int(b1[6]),int(b1[7]),int(b1[8]),int(b1[9])
            cv2.rectangle(img1,(x1,y1),(w1+x1,h1+y1),(255,0,0),1)
            cv2.putText(img1,b1[11],(x1,y1),cv2.FONT_HERSHEY_DUPLEX,0.34,(0,0,0),1)
            cv2.imshow('Image TO DATA', img1)
cv2.waitKey(0)

Esempio n. 21
0
contours, hierarchy = cv2.findContours(
    dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

im2 = img.copy()

file = open("recognized.json", "w+")
file.write("[")
file.close()

for idx, cnt in enumerate(contours):
    x, y, w, h = cv2.boundingRect(cnt)

    rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)

    cropped = im2[y:y + h, x:x + w]

    file = open("recognized.json", "a")

    print("[INFO] reading text...")
    text = pytesseract.image_to_data(cropped, output_type=Output.DICT)

    print("[INFO] writing text...")
    file.write(json.dumps(text))
    if idx != len(contours) - 1:
        file.write(",")
    else:
        file.write("]")
    # print(text)

    file.close()
print("[Finished] Data written to the file .\\recognised.json")
Esempio n. 22
0
def grid_search_adaptive_binarization(
        img_path,
        cv_adaptive_sizes=[15, 25],
        cv_adaptive_cs=[10],
        cv_adaptive_methods=['ADAPTIVE_THRESH_MEAN_C'],
        output=True,
        eval_method='mlc',
        ground_truth_path=None):
    '''
    try each size and C value to find best adaptive binarization params
    eval_method = [mlc, wer]
    evaluation measurement = WORD ERROR RATE
    return dict of results
    '''

    if eval_method not in ['mlc', 'wer']:
        print('error: invalid eval_method')
        return False

    if eval_method == 'wer' and ground_truth_path is not None:
        # load
        try:
            ground_truth = open(ground_truth_path, 'r').read()
        except:
            print('error: cannot load ground truth from .txt')

    if eval_method == 'wer' and ground_truth_path is None:
        print(
            'error: please provide a gound truth text if evaluation method == wer'
        )
        return False

    import time
    import numpy as np
    print_time_once = True
    gridsearch_result = []

    for method in cv_adaptive_methods:
        for c in cv_adaptive_cs:
            for size in cv_adaptive_sizes:
                start_time = time.time()

                # apply treshold
                img_bin_adapt = adaptive_binary(img_path, size, c, method)

                # apply ocr
                df_data = pytesseract.image_to_data(img_bin_adapt,
                                                    output_type='data.frame')

                # measurement
                if eval_method == 'mlc':
                    try:
                        length, text, measure = image_to_data_stats(
                            df_data, output=False)
                    except (e):
                        print('error: mean line confidence', e)
                        measure = np.nan
                        length = np.nan

                elif eval_method == 'wer':
                    try:
                        hypothesis = ' '.join(
                            df_data[df_data['text'].notnull()].text.to_list())
                        length, _, _ = image_to_data_stats(df_data,
                                                           output=False)
                        measure = word_error_rate(ground_truth, hypothesis)
                    except:
                        measure = np.nan
                        length = np.nan

                # add gs data
                gridsearch_result.append({
                    'length': length,
                    eval_method: measure,
                    'size': size,
                    'c': c,
                    'method': method
                })

                # make the time stop iterat
                time_end = round((time.time() - start_time), 2)

                if print_time_once:
                    cv_iterations = len(cv_adaptive_sizes) * len(
                        cv_adaptive_cs) * len(cv_adaptive_methods)
                    print('eval_method:', eval_method, ' nr. of iterations:',
                          cv_iterations, 'est. time (min):',
                          round(cv_iterations * time_end / 60, 2))
                    print_time_once = False

                if output:
                    print('GS>', 'method', method, 'size', size, 'c', c,
                          eval_method, measure, '\t time:', time_end)
                    plot_img(img_bin_adapt, 150)
                    plt.pause(0.05)
    '''return dict and best index with best params'''
    if eval_method == 'mlc':
        best_idx, best_params = max(enumerate(gridsearch_result),
                                    key=lambda item: item[1][eval_method])

    elif eval_method == 'wer':
        best_idx, best_params = min(enumerate(gridsearch_result),
                                    key=lambda item: item[1][eval_method])

    return gridsearch_result, best_idx, best_params
Esempio n. 23
0
def img_to_data(img):
    return pytesseract.image_to_data(img, output_type = pytesseract.Output.DICT)
Esempio n. 24
0
def get_data(im):
    return pytesseract.image_to_data(im, output_type=Output.DICT)
Esempio n. 25
0
img = cv.imread(root_path + filename)

if img is None:
    print("Error opening video stream or file")
    sys.exit()

img_thresholding = img.copy()
import pytesseract
from pytesseract import Output

# custom_config = r'--oem 3 --psm 6'
custom_config = r'--oem 3 --psm 6 -l kor+kor_vert+eng'  # korean + english

words_string = pytesseract.image_to_string(img)
words = pytesseract.image_to_data(img,
                                  config=custom_config,
                                  output_type=Output.DICT)
print(words.keys())
n_boxes = len(words['text'])
for i in range(n_boxes):
    if int(words['conf'][i]) > 60:
        (x, y, w, h) = (words['left'][i], words['top'][i], words['width'][i],
                        words['height'][i])
        img = cv.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv.imshow('Resource', img)


#thresholding
def thresholding(image):
    return cv.threshold(image, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)[1]
 if count > 1000:
     break
 # cv2.imwrite("frame%d.jpg" % count, image) # save frame as JPEG file
 success, image = vidcap.read()
 # print('Read a new frame: ', success)
 if (success):
     count += 1
     if (count > (13 * 30) and count % 50 == 0):
         try:
             img = image[650:702, 40:155]
             img = (get_grayscale(img))
             # Show Analysed Image
             # cv2.imshow('img', img)
             # cv2.waitKey(0)
             d = pytesseract.image_to_data(img,
                                           lang="eng",
                                           config=custom_config,
                                           output_type=Output.DICT)
             print(d['text'])
             n_boxes = len(d['text'])
             for i in range(n_boxes):
                 matches = [
                     "REPLAY", "REPL", "REP", "PLAY", "EPL", "PLA"
                 ]
                 if any(x in d['text'][i] for x in matches):
                     print("Goal scored detected at frame: {}".format(
                         count))
                     goalDetected = True
                     videosWithConfirmedGoal += 1
                     detectedGoalList.append([v, count])
                     # Overlay Bounding Boxes
                     # if int(d['conf'][i]) > 60:
Esempio n. 27
0
def tsvOCR(img, savpath, tsvfile, p=1, append=True, psm=1, oem=3):
    """
    
    Run OCR on an image and produce a TSV file along with text. Can limit which 
    files are added to the TSV by way of binomial sampling.
    
    Arguments
    --------------------------------------------------------------------------    
    img (str, PIL image) : Either the file path of an image or a PIL image object
    
    savpath (str)        : The file path to save the text output.
    
    tsvfile (str)        : Name of a TSV file that will be produced in the same 
                           directory as the text output. Ignored if not specified.

    p                    : Probability to use for binomial sampling. If 1, no sample
                           will be taken.                            

    append (boolean)     : If true, text output is appended to the file specified 
                           by savpath. If the file does not exist, it will be created.
                           Note: this argument also applies to the TSV output if a
                           TSV file is specified.
    
    psm (int)            : Tesseract configuration for Page Segmentation Mode. 
                          https://github.com/tesseract-ocr/tesseract/blob/master/doc/tesseract.1.asc
                         
    oem (int)            : Tesseract configuration for OCR Engine mode. 
                          https://github.com/tesseract-ocr/tesseract/blob/master/doc/tesseract.1.asc 
    
    Returns
    --------------------------------------------------------------------------
    none
    
    """

    #if a filename is used for the image, load the image
    if type(img) == str:
        name = os.path.split(img)[1]
        img = Image.open(img)
    else:
        name = img.info["name"]

    # Set up tesseract config
    if 0 <= psm <= 13:
        pconfig = "--psm " + str(psm)
    else:
        return "Invalid tesseract config."

    if 0 <= oem <= 3:
        oconfig = "--oem " + str(oem)
    else:
        return "Invalid tesseract config."

    tconfig = pconfig + " " + oconfig

    #determine mode to save files
    if append == True:
        mode = {"mode": "a"}
    else:
        mode = {"mode": "w"}

    #Get TSV data
    tsvs = pytesseract.image_to_data(img, config=tconfig)
    tdf = pandas.read_csv(StringIO(tsvs),
                          sep="\t",
                          engine="python",
                          error_bad_lines=False)

    #Turn TSV data into text
    par = 0
    line = 0
    text = ""

    for row in tdf.itertuples():
        token = str(row.text)

        #adjust tokens
        if token == "nan":
            continue
        if row.par_num != par:
            par = row.par_num
            line = row.line_num
            token = "\n\n" + token + " "
        elif row.line_num != line:
            line = row.line_num
            token = "\n" + token + " "
        else:
            token = token + " "

        #compile text
        text = text + token

    #write text to file
    ocrf = open(savpath, **mode)
    ocrf.write(text)
    ocrf.close()

    #Drop TSV columns we don't need
    tdf = tdf.drop(columns=[
        "level", "page_num", "block_num", "par_num", "line_num", "word_num"
    ])

    #Sample TSV data
    if p != 1:
        samp = random.binomial(size=tdf.count()[0], n=1, p=p)

        r = 0
        for s in samp:
            if s == 0:
                tdf = tdf.drop(r)
            r += 1

    #if we aren't left with an empty data frame
    if tdf.count()[0] != 0:

        #add a column for the image name
        tdf["name"] = name

        #determine if a header is needed
        dirpath = os.path.split(savpath)[0]
        if append == True and os.path.exists(
                os.path.normpath(os.path.join(dirpath, tsvfile))) == True:
            header = False
        else:
            header = True

        #write data to TSV file
        tsvf = open(os.path.normpath(os.path.join(dirpath, tsvfile)), **mode)
        tdf.to_csv(tsvf, index=False, sep="\t", header=header)
        tsvf.close()

    return
def getPdfTextData():
    # Scan the current directory for the .pdf file that will be converted
    for file_ in os.scandir(os.getcwd()):
        if file_.name.endswith('.pdf'):
            filenames.append(file_.name)

    # Load the .pdf file and save as .png
    DPI = 600
    pdf = Image(filename=filenames[0], resolution=DPI)
    png_files = []
    for page_num, page in enumerate(pdf.sequence):
        page = Image(page)
        page = page.convert('png')
        temp_file_name = 'temp_' + str(page_num) + '.png'
        png_files.append(temp_file_name)
        page.save(filename=temp_file_name)

    # Load the .png files and combine to make one image
    img_arrays = []
    #if len(png_files) > 1:
    for file_ in (png_files):
        # open the first image file
        img = cv2.imread(file_)
        sz = img.shape
        start_row = round(0.05 * sz[0])
        end_row = sz[0] - start_row
        # convert the image to numpy array
        img_arrays.append(np.array(img))  #[start_row:end_row,:,:]))
        # delete the temporary png file that we just read
        os.remove(file_)
    final_img_arr = np.vstack(img_arrays)

    img = PIL.Image.fromarray(final_img_arr).convert('L')
    threshold = 200
    img = img.point(lambda p: p > threshold and 255)
    img.show()

    # this is where we will temporarily store the extracted text
    tsv_filename = "text.tsv"
    # Get TSV information about the text and write the temp file
    # --psm 3 or --psm 6 or --psm 11 is working well
    custom_oem_psm_config = r'--psm 3'
    text = pytesseract.image_to_data(img, config=custom_oem_psm_config)
    file_object = open(tsv_filename, 'w')
    file_object.write(text)
    file_object.close()

    # Read the TSV .txt file into a pandas dataframe
    csv_table = pd.read_table(tsv_filename, sep='\t')
    # delete the TSV (.txt) file we created above now that the data is read into pd
    os.remove(tsv_filename)
    # Drop the rows with NaN values
    csv_table = csv_table.dropna()
    csv_table = csv_table.reset_index(drop=True)

    # Clean the image

    # DOCUMENT TYPE 1
    #Physician Referral - Print View                            https://prs.choosebroadspire.com/Template.aspx?PRSId=20191115... (Top)
    #1 of 4                                                     11/15/19, 2:22 PM  (Always on Bottom)

    # DOCUMENT TYPE 2
    #11/8/2019                                                  Physician Referral - Print View (Always on top)
    #https://prs.choosebroadspire.com/Template.a...             1/2 (Always on bottom)

    # Remove timestamps, datestamps, and website stamps
    # Find and drop the lines of text that match regular expressions like:
    #   - reg_expr = 'http://%'
    #   - reg_expr = '[0-1][0-9]/[0-3][0-9]/[0-9][0-9], [0-12]+:[0-5][0-9] [AP]M'

    print(csv_table)
    # if document is of type 1
    if csv_table.text.values[0] == 'Physician':
        cond_line1 = csv_table.text.str.contains('https://.*', regex=True)
        cond_line2 = csv_table.text.str.contains(
            '[0-9]{1,2}/[0-9]{1,2}/[0-9]{1,2},', regex=True)

        line1_tops = csv_table[cond_line1].top.values
        line1_heights = csv_table[cond_line1].height.values

        line2_tops = csv_table[cond_line2].top.values
        line2_heights = csv_table[cond_line2].height.values

        # now that we have the top of the bounding boxes for each line
        # lets search through all other entries in the data frame that
        # have tops in a small interval around the top of lines of interest
        for top1, top2, height1, height2 in zip(line1_tops, line2_tops,
                                                line1_heights, line2_heights):
            cond11 = csv_table['top'].ge(top1 - height1 * 0.7)
            cond12 = csv_table['top'].le(top1 + height1 * 0.7)
            line1_table = csv_table[cond11 & cond12]

            #get the indices for the table above and drop them from the final table
            cond21 = csv_table['top'].ge(top2 - height2 * 0.7)
            cond22 = csv_table['top'].le(top2 + height2 * 0.7)
            line2_table = csv_table[cond21 & cond22]

            line1_indices = line1_table.index.values
            line2_indices = line2_table.index.values
            csv_table = csv_table.drop(index=line1_indices)
            csv_table = csv_table.drop(index=line2_indices)

            csv_table = csv_table.reset_index(drop=True)

    # if document is of type 2
    if csv_table.head(1).text.str.contains('[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}',
                                           regex=True).values[0]:
        # Because document of type 2 have header lines we want to remove containing dates,
        # we need to be careful not to delete other lines within the document that contain dates.
        # To do this, we find all strings that have the date form and filter those by adding the contstraint
        # that the next index in the table should be Physician Referral

        # get all a table with matches for dates
        dates_cond = csv_table.text.str.contains(
            '[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}', regex=True)
        dates_indices = csv_table[dates_cond].index.values
        dates_indices1 = dates_indices.tolist()

        line1_tops = []
        line1_heights = []
        for index in dates_indices:
            if csv_table.at[index + 1, 'text'] == 'Physician' and csv_table.at[
                    index + 2, 'text'] == 'Referral':
                line1_tops.append(csv_table.at[index, 'top'])
                line1_heights.append(csv_table.at[index, 'height'])
            else:
                continue

        cond_line2 = csv_table.text.str.contains('https://.*', regex=True)

        line2_tops = csv_table[cond_line2].top.values
        line2_heights = csv_table[cond_line2].height.values

        # now that we have the top of the bounding boxes for each line
        # lets search through all other entries in the data frame that
        # have tops in a small interval around the top of lines of interest
        print('********')
        print(line1_tops)
        print(line1_heights)
        print(line2_tops)
        print(line2_heights)
        for top1, top2, height1, height2 in zip(line1_tops, line2_tops,
                                                line1_heights, line2_heights):
            print('****')
            cond11 = csv_table['top'].ge(top1 - height1 * 0.7)
            cond12 = csv_table['top'].le(top1 + height1 * 0.7)
            line1_table = csv_table[cond11 & cond12]
            line1_indices = line1_table.index.values
            print(line1_indices)
            csv_table = csv_table.drop(index=line1_indices)
            csv_table = csv_table.reset_index(drop=True)

            #get the indices for the table above and drop them from the final table
            cond21 = csv_table['top'].ge(top2 - height2 * 0.7)
            cond22 = csv_table['top'].le(top2 + height2 * 0.7)
            line2_table = csv_table[cond21 & cond22]
            line2_indices = line2_table.index.values
            print(line2_indices)
            csv_table = csv_table.drop(index=line2_indices)
            csv_table = csv_table.reset_index(drop=True)

    return csv_table
Esempio n. 29
0
def check(path=r'./imgs/300/Sample11-0.jpg', dpi=300):
    for p in range(11, 12):
        for o in range(3, 4):
            img = cv2.imread('./imgs/200/Sample21-0.jpg')
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img1 = copy.deepcopy(img)
            #temp = sys.stdout
            #f = open('out.txt', 'w')
            #sys.stdout = f
            d = pytesseract.image_to_data(
                img,
                output_type=pytesseract.Output.DICT,
                lang='eng',
                config=
                '-c  tessedit_char_whitelist="$%@.,&():ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 "  --psm {} --oem {}'
                .format(p, o))
            n_boxes = len(d['level'])
            print(d)
            #f.close()
            #sys.stdout = temp
            print('Read image')
            try:
                width = img.shape[1]
                height = img.shape[0]
                fps = 1
                font = cv2.FONT_HERSHEY_SIMPLEX
                thickness = 2
                color = (0, 0, 255)
                fontscale = 1
                org = (width - 1000, height - 100)
                org1 = (width - 1000, height - 200)
                #fourcc = VideoWriter_fourcc(*'XVID')
                #video = VideoWriter('tess_with_sp_psm_{}_oem_{}.avi'.format(p, o), fourcc, float(fps), (width, height))

                for i in range(n_boxes):
                    #if(d['width'][i] <= 10 or d['height'][i] <= 10):
                    #    continue
                    if (d['text'][i] == ''):
                        continue
                    (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i],
                                    d['height'][i])
                    cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
                    img1 = img1[y - 5:y + h + 5, x - 5:x + w + 5]
                    print(
                        pytesseract.image_to_string(
                            img1,
                            output_type=pytesseract.Output.DICT,
                            lang='eng',
                            config=
                            '-c  tessedit_char_whitelist="$%@.,&():ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 "  --psm {} --oem {}'
                            .format(p, o)))
                    cv2.imwrite('temp/{}.jpg'.format(i), img1)
                    img1 = copy.deepcopy(img)
                    text = 'w_{}_h_{}'.format(d['width'][i], d['height'][i])
                    image = cv2.putText(img1, text, org, font, fontscale,
                                        color, thickness, cv2.LINE_AA)
                    image = cv2.putText(img1, d['text'][i], org1, font,
                                        fontscale, color, thickness,
                                        cv2.LINE_AA)
                    #video.write(img1)
                cv2.imwrite(
                    './{}_tess_with_sp_psm_{}_oem_{}.jpg'.format(dpi, p, o),
                    img)
                print('image saved')
            except Exception as e:
                print('fsdfa')
                print("{} {}".format(p, o))
                print(e)
Esempio n. 30
0
        im_resized = im.resize(size, Image.ANTIALIAS)
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
        temp_filename = temp_file.name
        im_resized.save(temp_filename, dpi=(300, 300))

        #pytesseract
        #config = "-l eng --oem 1 --psm 6 -c preserve_interword_spaces=1x1 -c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
        config = "-l eng --oem 1 --psm 6 -c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

        text = pytesseract.image_to_string(temp_filename, config=config)
        print(text)
        rowtext.append(text)

        #pytesseract score_retrieval
        text_score = pytesseract.image_to_data(cleaned_img,
                                               output_type='data.frame',
                                               config=config)
        text_score = text_score[text_score.conf != -1]
        lines = text_score.groupby(['block_num'])['text'].apply(list)
        conf = list(text_score.groupby(['block_num'])['conf'].mean())
        conf_score_list.append(conf)

        #CRNN_Sparse
        #comment these two below lines for enabling pytesseract
        # text = textdetector(temp_filename, model_crnnspar)
        # print(text)
        # rowtext.append(text)

    csv_writer.writerow(rowtext)
    row_para.append(rowtext)
csvfile.close()
Esempio n. 31
0
from wand.image import Image as wand_Image
from PIL import Image as PIL_Image
from wand.color import Color
import os
import pytesseract

def build_images(force=False):
    if not all([not force, os.path.isfile('./foo-0.png'), os.path.isfile('./foo-0.png')]):
        all_pages = wand_Image(filename='./example.pdf', resolution=300)
        for idx, page in enumerate(all_pages.sequence):
            with Image(page) as i:
                i.format = 'png'
                i.background_color = Color('white')
                i.alpha_channel = 'remove'
                i.save(filename='foo-%s.png' % idx)


build_images()

boxes = pytesseract.image_to_boxes(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT)
data = pytesseract.image_to_data(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT)
osd = pytesseract.image_to_osd(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT)