def detect_rotation_most_frequent(image_fn: str) -> Optional[float]: proc = cv2.imread(image_fn, 0) height, width = proc.shape part_size: int = IMAGE_PART_SIZE num_parts: int = round(height / part_size) # split image to multiple blocks, determine skew angle of each part and take median # this solves problem with the documents having alignment which provocates false-determining # of the skew for the document as a whole if height >= width: ar = [(h * part_size, (h + 1) * part_size) for h in range(num_parts)] images = [proc[i[0]:i[1]] for i in ar] else: ar = [(w * part_size, (w + 1) * part_size) for w in range(num_parts)] images = [proc[:, i[0]:i[1]] for i in ar] angles = [deskew.determine_skew(img) for img in images] angles = [a for a in angles if a is not None] if not angles: return None freqs = Counter(angles) most_frequent = sorted(freqs.items(), key=lambda it: it[1], reverse=True)[0] if most_frequent[1] > 1: # if at least some angle repeats - return the one with the max frequency return most_frequent[0] else: # otherwise use median angle - which is usually good but not the best return median(angles)
def main() -> None: parser = argparse.ArgumentParser() parser.add_argument( '-o', '--output', default=None, help='Output file name') parser.add_argument( '--sigma', default=3.0, help='The use sigma') parser.add_argument( '--num-peaks', default=20, help='The used num peaks') parser.add_argument( default=None, dest='input', help='Input file name') options = parser.parse_args() image = io.imread(options.input) grayscale = rgb2gray(image) angle = determine_skew(grayscale, sigma=options.sigma, num_peaks=options.num_peaks) if options.output is None: print('Estimated angle: {}'.format(angle)) else: rotated = rotate(image, angle, resize=True) * 255 io.imsave(options.output, rotated.astype(np.uint8))
def correctSkew(originalImg): img = originalImg.copy() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ang = deskew.determine_skew(gray) rotated = rotate(img, ang, (0, 0, 0)) return rotated
def crop(full_path, i): image = cv2.imread(full_path) grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) angle = determine_skew(grayscale) im = rotate(image, angle, (0, 0, 0)) cv2.imwrite("output.jpg", im) im = Image.open("output.jpg") im = im.convert('L') im = ImageEnhance.Contrast(im) im = im.enhance(5) im = im.filter(ImageFilter.EDGE_ENHANCE) im = im.resize((im.size[0], im.size[1])) im.save("output.jpg") img = cv2.imread("output.jpg") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) kernel = np.ones((5, 5), np.uint8) edges = cv2.Canny(gray, 0, 200, apertureSize=3) edges = cv2.dilate(edges, kernel) _, contours, heirarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) area = [cv2.contourArea(c) for c in contours] Ar_max = np.argmax(area) cMax = contours[Ar_max] x, y, w, h = cv2.boundingRect(cMax) im = im.crop((x, y, x + w, y + h)) im = im.rotate(90 * i) im.save("cropped.jpeg") cropped = cv2.imread("cropped.jpeg") os.remove("cropped.jpeg") return cropped
def openGreyRotate(imagePath): imgColor = cv2.imread(imagePath) imgGrey = cv2.cvtColor(imgColor, cv2.COLOR_BGR2GRAY) angle = determine_skew(imgGrey) rotated = rotate(imgColor, angle + 90, (0, 0, 0)) rotatedGrey = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY) return rotatedGrey
def get_angle_text(img, method="hough", limit=5.0, step=1.0): if method == "hough": angle = determine_skew(img) if abs(angle) >= 90: # Vertical lines detected angle = angle % 90 angle -= 90 if abs(angle) > 5: print( "\t- [WARNING] Ignoring rotation. Maximum angle exceeded ({:.2f}º > +-{:.2f}º)" .format(angle, limit)) angle = 0.0 return angle elif method == "projection": # Find rotation angle angles = np.arange(-limit, limit + step, step) scores = [] scores = [] for angle in angles: hist, score = find_score(img, angle) scores.append(score) best_score = max(scores) best_angle = angles[scores.index(best_score)] else: raise NameError("Unknown method") return best_angle
def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("-o", "--output", default=None, help="Output file name") parser.add_argument("--sigma", default=3.0, help="The use sigma") parser.add_argument("--num-peaks", default=20, help="The used num peaks") parser.add_argument("--background", help="The used background color") parser.add_argument(default=None, dest="input", help="Input file name") options = parser.parse_args() image = io.imread(options.input) grayscale = rgb2gray(image) angle = determine_skew(grayscale, sigma=options.sigma, num_peaks=options.num_peaks) if options.output is None: print(f"Estimated angle: {angle}") else: if options.background: try: background = [int(c) for c in options.background.split(",")] except: # pylint: disable=bare-except print("Wrong background color, should be r,g,b") sys.exit(1) rotated = rotate(image, angle, resize=True, cval=-1) * 255 pos = np.where(rotated == -255) rotated[pos[0], pos[1], :] = background else: rotated = rotate(image, angle, resize=True) * 255 io.imsave(options.output, rotated.astype(np.uint8))
def rotate_image(image, resize=True): if len(image.shape) > 2: img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: img_gray = image angle = determine_skew(img_gray) rotated = rotate(image, angle, resize=resize) return angle, rotated
def test_deskew(image, expected_angle): root_folder = f"results/{image}" if not os.path.exists(root_folder): os.makedirs(root_folder) image = io.imread( os.path.join(os.path.dirname(__file__), f"deskew-{image}.png")) angle = determine_skew(image) assert angle == expected_angle
def fix_orientation(image: np.array) -> np.array: grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) angle = determine_skew(grayscale) # if is_image_upside_down(image): # angle += 180 rotated = rotate(image, angle, (0, 0, 0)) alt_rotated = rotate(image, angle+180, (0, 0, 0)) return [rotated, alt_rotated]
def deskewImage(image): grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) angle = determine_skew(grayscale) rotated = grayscale if (angle < 0 and abs(angle) > 45): angle = angle + 90 rotated = rotate(grayscale, angle, (0, 0, 0)) file = '../results/deskew_image/image.png' cv2.imwrite(file, rotated) return
def test_deskew_higher_pressision(image, expected_angle): root_folder = f"results/{image}" if not os.path.exists(root_folder): os.makedirs(root_folder) image = io.imread( os.path.join(os.path.dirname(__file__), f"deskew-{image}.png")) angle = determine_skew(image, num_angles=1800) print(angle - expected_angle.expected) assert angle == expected_angle
def test_deskew(image, expected_angle): root_folder = 'results/{}'.format(image) if not os.path.exists(root_folder): os.makedirs(root_folder) image = io.imread( os.path.join(os.path.dirname(__file__), 'deskew-{}.png'.format(image))) grayscale = rgb2gray(image) angle = determine_skew(grayscale) print(angle - expected_angle.expected) assert angle == expected_angle
def auto_deskew(self): """Deskews image by calculating optimal rotation angle and then applying rotation. :return: """ grayscale = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY) angle = determine_skew(grayscale) rotated = rotate_img(self.img, angle, self.background) self.img = rotated
def deskewing(image): pytesseract.pytesseract.tesseract_cmd = r'C:\Tesseract-OCR\tesseract.exe' grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) new = cv2.bitwise_not(grayscale) angle = determine_skew(new) if angle != 0 and angle > -90 : angle = (angle + 90) else: angle = angle print(angle) rotated = xoay(image, angle, (255, 255, 255)) return rotated
def idcard_cropped_autodeskew(image): idcard_crop = image.copy() # h,w = idcard_crop.shape[:2] edges = super_edges(idcard_crop) plt.figure(figsize=(12,12)) plt.imshow(edges, cmap='gray') angle = determine_skew(edges) rotated = rotate(image, angle, resize=True, cval=1) rotated = (rotated*255).astype(np.uint8) print('angle', angle) return rotated, angle
def generated_text(): image = io.imread('text_detected-objects\\licence-00000.jpg') grayscale = rgb2gray(image) angle = determine_skew(grayscale) rotated = rotate(image, angle, resize=True) * 255 io.imsave('deskewed_fin.jpg', rotated.astype(np.uint8)) img = process_image_for_ocr('deskewed_fin.jpg') cv2.imwrite('processed_fin.jpg', img) text = pytesseract.image_to_string(Image.open('processed_fin.jpg'), config='-c tessedit_char_whitelist=.ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') #cv2.imshow('',cv2.imread('text_detected.jpg',0)) #cv2.waitKey(5000) return text
def rotate(image: np.ndarray, background: Union[int, Tuple[int, int, int]]) -> np.ndarray: angle = determine_skew(image) old_width, old_height = image.shape[:2] angle_radian = math.radians(angle) width = abs(np.sin(angle_radian) * old_height) + abs( np.cos(angle_radian) * old_width) height = abs(np.sin(angle_radian) * old_width) + abs( np.cos(angle_radian) * old_height) image_center = tuple(np.array(image.shape[1::-1]) / 2) rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0) rot_mat[1, 2] += (width - old_width) / 2 rot_mat[0, 2] += (height - old_height) / 2 return cv2.warpAffine(image, rot_mat, (int(round(height)), int(round(width))), borderValue=background)
def _auto_deskew_word(self, impath, resize=False): result = self._detect_word_boxes(impath) polys, boxes, images_patch, img, score_text, score_link, ret_score_text = result angle = determine_skew(score_text + score_link) rotated_img = rotate(img, angle, resize=True, cval=1) rotated_img = (rotated_img * 255).astype(np.uint8) if resize: shape = rotated_img.shape[:2] max_index = shape.index(max(shape)) if max_index == 1: rotated_img = imutils.resize(rotated_img, width=1000) else: rotated_img = imutils.resize(rotated_img, height=1000) return rotated_img, angle
def enhance_image(path): #try to read the image, alert if it's not readable try: image = io.imread(path) except: print("could not read target image") return False #make grayscale grayscale = rgb2gray(image) #increase contrast contrasted = adjust_sigmoid(grayscale, cutoff=0.5, gain=10, inv=False) #render text horizontal angle = determine_skew(contrasted) if abs(angle) > 75: angle2 = (abs(angle) - 90) * (angle / abs(angle)) else: angle2 = angle rotated = rotate(contrasted, angle2, resize=True) * 255 #save as 'output.png' io.imsave("output.png", rotated.astype(np.uint8))
def deskew_image(): image = cv2.imread(os.path.join(PATH, FILENAME)) grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) angle = determine_skew(grayscale) rotated = cv2_rotate(image, angle, (0, 0, 0)) cv2.imwrite(os.path.join(OUTPUT_PATH, f'deskewed_' + FILENAME), rotated)
def deskew(self, img): grayscale = rgb2gray(img) angle = determine_skew(grayscale) rotated = rotate(img, angle, resize=True) * 255 print(f'Deskew angle : {np.round(angle,2)}') return angle, rotated.astype(np.uint8)
def deskew_image(vinput, voutput): image = cv2.imread(vinput) grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) angle = determine_skew(grayscale) rotated = rotate(image, angle, (255, 255, 255)) cv2.imwrite(voutput, rotated)
def detect_rotation_using_skewlib(image_fn: str) -> Optional[float]: proc = cv2.imread(image_fn, 0) return deskew.determine_skew(proc)
pix = None path = '/home/bhanu/net-centric/resources/skew/check/' path1 = '/home/bhanu/net-centric/resources/skew/impdf/' path2 = '/home/bhanu/net-centric/resources/skew/pdf2img/' for filename in os.listdir(path): if filename.endswith(".png") or filename.endswith(".jpg") or filename.endswith(".jpeg"): i += 1 print("Detecting and deskewing the given images") print(os.path.join(path, filename)) x = os.path.join(path, filename) image = io.imread(x) grayscale = rgb2gray(image) angle = determine_skew(grayscale) rotated = rotate(image, angle, resize=True) * 255 print("Saving the deskewed images") io.imsave("/home/bhanu/net-centric/resources/skew/deskew/" + 'deskew' + str(i) + '.png', rotated.astype(np.uint8)) elif filename.endswith('.pdf'): imInPdf(path + filename) for files in os.listdir(path1): if files.endswith(".png"): print("Detecting and deskewing images inside pdf") print(os.path.join(path1, files)) x = os.path.join(path1, files) image = io.imread(x) grayscale = rgb2gray(image) angle = determine_skew(grayscale) rotated = rotate(image, angle, resize=True) * 255
def htr(filepath): image = cv2.imread(filepath, 0) #change filename rows, cols = image.shape kernel = np.ones((9, 9), np.uint8) erode = cv2.erode(image, kernel, iterations=1) angle = determine_skew(erode) img = rotate(image, angle, resize=True) * 255 img = np.uint8(img) print('\ngot image') # mser properties _delta = 5 _min_area = 60 _max_area = 14400 _max_variation = 0.25 _min_diversity = .2 _max_evolution = 200 _area_threshold = 1.01 _min_margin = 0.003 _edge_blur_size = 5 mser = cv2.MSER_create(_delta, _min_area, _max_area, _max_variation, _min_diversity, _max_evolution, _area_threshold, _min_margin, _edge_blur_size) regions, boundingBoxes = mser.detectRegions(img) out_image_2 = np.zeros(img.shape, dtype='uint8') regions2 = [] area_regions = [] for region in regions: region = np.asarray(region) min1 = np.amin(region[:, 0]) max1 = np.amax(region[:, 0]) min2 = np.amin(region[:, 1]) max2 = np.amax(region[:, 1]) if max1 != min1 and max2 != min2: e = float(max2 - min2) / float(max1 - min1) ac = float(len(region)) / ((max2 - min2) * (max1 - min1)) if e > 0.1 and e < 10 and ac > 0.2: regions2.append(region) area_regions.append((max2 - min2) * (max1 - min1)) out_image_2[region[:, 1], region[:, 0]] = 255 area_regions = np.asarray(area_regions) regions = regions2 n, bins = np.histogram(area_regions, bins="auto") avg = 0 num = 0 a, b = bins[np.argmax(n)], bins[np.argmax(n) + 1] for i in range(len(area_regions)): if area_regions[i] > a and area_regions[i] < b: avg += area_regions[i] num += 1 avg = avg / float(num) kernell = np.ones((1, int(0.7 * np.sqrt(avg))), np.uint8) appx_size = int(0.7 * np.sqrt(avg)) out_image_3 = cv2.dilate(out_image_2, kernell, iterations=1) kernel2 = np.ones((int(0.2 * np.sqrt(avg)), 1), np.uint8) out_image_4 = cv2.dilate(out_image_3, kernel2, iterations=1) cnts, _ = cv2.findContours( out_image_4.astype(np.uint8).copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) regions1 = [] for i in range(len(cnts)): x, y, w, h = cv2.boundingRect(cnts[i]) include = True for j in range(len(cnts)): if j != i: x1, y1, w1, h1 = cv2.boundingRect(cnts[j]) if x >= x1 and y >= y1 and x + w <= x1 + w1 and y + h <= y1 + h1: include = False if (h > 2 * appx_size or w > 2 * appx_size or w * h > 100) and include: regions1.append([x, y, w, h]) regions1 = np.array(regions1) area = regions1[:, 2] * regions1[:, 3] area = np.array(sorted(area)) / (rows * cols) regions2 = [[] for i in range(len(regions1))] regions2[0].append(regions1[0]) line_idx = 0 for i in range(1, len(regions1)): x, y, w, h = regions1[i] xa, ya, wa, ha = regions1[i - 1] a = max(y, ya) b = min(h + y, ha + ya) if (b - a) > 0: regions2[line_idx].append(regions1[i]) else: line_idx = line_idx + 1 regions2[line_idx].append(regions1[i]) regions2 = np.array(regions2) regions2 = [x for x in regions2 if x != []] regions3 = [] for i in range(len(regions2) - 1, -1, -1): array = np.array(regions2[i]) g = np.argsort(array[:, 0]) lin = array[g, :] regions3.append(lin) content = u'' for line in regions3: LineString = '' for i in range(len(line[:, 0])): x, y, w, h = line[i, :] w = img[y:y + h, x:x + w] Word = predict(w, model2) LineString += Word + ' ' LineString += '\n' content += LineString return content
def deskew(self,_img,principal_angle=11.25): grayscale = rgb2gray(_img) angle = principal_angle + determine_skew(grayscale) rotated = rotate(_img, angle, resize=True) * 255 print(-angle) return -principal_angle,rotated.astype(np.uint8)
def imageSplit(imageDir, imageName, tileSize): startTime = datetime.now() print("\t\tBeginning split for image {} at {}".format(imageName, startTime.strftime("%H:%M:%S"))) imagePath = os.path.join(imageDir, imageName) # Create DataFrame to store image information tileInfo = [] # Convert image to PNG for processing baseImage = Image.open(imagePath) baseImage.save(os.path.join(codeCacheDir, "baseImage_PNG.png"), "PNG") #/Users/max/Quick Jupyter Notebooks/MMAI/MMAI 894 - Deep Learning/code_cache/baseImgae_PNG", "PNG") baseImage = io.imread(os.path.join(codeCacheDir, "baseImage_PNG.png")) conversionTime = datetime.now() - startTime print("\t\t\t Conversion completed in {} seconds".format(conversionTime.seconds)) # Rotate and save image grayscale = rgb2gray(baseImage) angle = determine_skew(grayscale) rotated = rotate(baseImage, angle, resize=True)*255 io.imsave(os.path.join(codeCacheDir,"baseImgae_PNG_rotated.png"), rotated.astype(np.uint8)) rotateTime = datetime.now() - startTime + conversionTime print("\t\t\t Rotation completed in {} seconds".format(rotateTime.seconds)) # Find and crop grey edges img = cv2.imread(os.path.join(codeCacheDir,"baseImgae_PNG_rotated.png")) grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) _, threshold = cv2.threshold(grayscale, 0, 255, cv2.THRESH_BINARY) contours, hierarchy = cv2.findContours(threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) count = contours[0] x, y, w, h = cv2.boundingRect(count) cropped = img[y:y + h, x:x + w] cv2.imwrite(os.path.join(codeCacheDir, "grayscale_check.png"), grayscale) cv2.imwrite(os.path.join(codeCacheDir,"baseImage_rotate_borderless.png"), cropped) borderRemovalTime = datetime.now() - startTime + rotateTime print("\t\t\t Border removal completed in {} seconds".format(borderRemovalTime.seconds)) # Determine chunk size and chunk images ready_image = Image.open(os.path.join(codeCacheDir,"baseImage_rotate_borderless.png")) width, height = ready_image.size chunks_wide = width // tileSize chunks_high = height // tileSize # ready_image = io.imread('/Users/max/Quick Jupyter Notebooks/MMAI/MMAI 894 - Deep Learning/code_cache/baseImage_rotate_borderless.png') chunkCoords = [] chunkCount = (chunks_high + 1) * (chunks_wide + 1) chunkDim = 234 Xs = [] Ys = [] increasingX = 0 increasingY = 0 while len(Xs) < chunks_wide: Xs.append(increasingX) increasingX += chunkDim Xs.append(width - chunkDim) while len(Ys) < chunks_high: Ys.append(increasingY) increasingY += chunkDim Ys.append(height - chunkDim) for x in Xs: x1 = x x2 = x + chunkDim for y in Ys: y1 = y y2 = y + chunkDim chunkCoords.append([x1, y1, x2, y2]) # Generate Tiles for tile in chunkCoords: cropped = ready_image.crop((tile[0], tile[1], tile[2], tile[3])) filename = "{}-{}-{}-{}-{}.png".format(imageName, tile[0], tile[1], tile[2], tile[3]) filepath = os.path.join(tileCacheDir, filename) tileInfo.append([imageName,filename, filepath]) cropped.save(filepath, "PNG") tileGenerationTime = datetime.now() - startTime + borderRemovalTime print("\t\t\t Tile generation completed in {} seconds".format(tileGenerationTime.seconds)) ready_image.save(os.path.join(codeCacheDir, "processedImages/{}_prc".format(imageName)), "PNG") totalTime = datetime.now() - startTime print("\t\tFile: {} completed in {} seconds\r\r".format(imageName, totalTime.seconds)) return tileInfo