def get_grid_img(frame): image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) image = cv2.GaussianBlur(image, (11, 11), 0) image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_MEAN_C | cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,4) # cv2.imshow("original", image) # display_image(image) contours = get_sorted_contours(image) contour, corners = get_grid_contour(contours) x, y, width, height = cv2.boundingRect(contour) average_grid_wh = int((int(width) + int(height)) / 2) # show outline of selected grid on full image # grid_outline = draw_grid_outline(frame.copy(), corners) # cv2.imshow("grid_outline", grid_outline) if contour is None: return None, None warped_grid, transform_matrix_inv = warp(image, corners, average_grid_wh) return warped_grid, transform_matrix_inv
def large_image_demo(image): print(image.shape) cw = 256 ch = 256 h,w = image.shape[:2] gray = cv.cvtColor(image,cv.COLOR_BGR2GRAY) for row in range(0,h,ch): for col in range(0,w,cw): roi = gray[row:row+ch,col:col+cw] dst = cv.adaptiveThreshold(roi,255,cv.ADAPTIVE_THRESH_MEAN_C,cv.THRESH_BINARY,25,20) gray[row:row+ch,col:col+cw] = dst print(np.std(dst),np.mean(dst)) """ 全局二值化结合方差过滤实现类似自适应二值化的效果 dev = np.std(roi) if dev<15: gray[row:row+ch,col:col+cw] = 255 else: ret,dst = cv.threshold(roi,0,255,cv.THRESH_BINARY|cv.THRESH_OTSU) gray[row:row+ch,col:col+cw] = dst """ cv.imshow("result",gray)
def ImageToBWEdgeDetect(img): # Load image: img_class = img.__class__.__name__ if img_class == "str": img = Image.open(img) elif img_class == None: print("Invalid image format") return None # Adding white background to trasnaprent images: working_size = GetImageResize(img, 512, max_ratio=1.5) img = img.resize(working_size) img = img.convert("RGBA") white_bg = Image.new("RGB", img.size, "WHITE") white_bg.paste(img, (0, 0), img) img = white_bg.convert("RGB") # Usin OpenCV to find edges: # img = cv2.imread(img, 0) img = cv2.cvtColor(numpy.array(img), cv2.COLOR_RGB2GRAY) img = cv2.resize(img, working_size, interpolation=cv2.INTER_LINEAR) img = cv2.medianBlur(img, 5) img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(img) # img = img.convert("RGBA") # white_bg = Image.new("RGB", img.size, "WHITE") # white_bg.paste(img, (0, 0), img) # img = white_bg.convert("RGB") # img = img.quantize(colors = 2, method = Image.MAXCOVERAGE) # img = img.convert(mode = "1", dither = Image.NONE) #return img.quantize(colors = 2) return img # img = ImageToBWEdgeDetect("RobloxImages/5918162200.jpg") # img.save("grayscale_test.png")
def cartoonizer(img, num_down=2, num_bi=5): #Params #num_down = 2 #DOWNSAMPLE STEPS #num_bi = 5 # BILATERAL FILTERING STEPS img_c = img for ix in range(num_down): img_c = cv2.pyrDown(img) # Pyramid Down : Downsampling # print(img_c.shape) for iy in range(num_bi): img_c = cv2.bilateralFilter(img_c, d=9, sigmaColor=9, sigmaSpace=7) #Filtering # print(img_c.shape) #UPSAMPLING for ix in range(num_down): img_c = cv2.pyrUp(img_c) # Pyramid Down : Downsampling # print(img_c.shape) #BLUR and Threshold img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # GRAY SCALE img_blur = cv2.medianBlur(img_gray, 7) #MEDIAN BLUR img_edge = cv2.adaptiveThreshold(img_blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, blockSize=9, C=2) img_c = cv2.resize(img_c, (800, 800)) #RGB CONVERSION + BITWISE & img_edge = cv2.cvtColor(img_edge, cv2.COLOR_GRAY2RGB) # print(img_c.shape) # print(img_edge.shape) img_cartoon = cv2.bitwise_and(img_c, img_edge) stack = np.hstack([img, img_cartoon]) return stack
def get_text(PDF_file): # Path of the pdf # Store all the pages of the PDF in a variable pages = convert_from_path(PDF_file, 500) # Counter to store images of each page of PDF to image image_counter = 1 # Iterate through all the pages stored above t = "" for page in pages: # Declaring filename for each page of PDF as JPG # For each page, filename will be: # PDF page 1 -> page_1.jpg # PDF page 2 -> page_2.jpg # PDF page 3 -> page_3.jpg # .... # PDF page n -> page_n.jpg filename = "page_" + str(image_counter) + ".jpg" page.save(filename, 'JPEG') img = cv2.imread(filename) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Apply dilation and erosion to remove some noise kernel = np.ones((1, 1), np.uint8) img = cv2.dilate(img, kernel, iterations=1) img = cv2.erode(img, kernel, iterations=1) img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2) text = str( ((pytesseract.image_to_string(img, config=custom_oem_psm_config)))) t += text + "\n" os.remove(filename) return t
def try_wrap(): img = cv2.imread("datas/images/namecard_01.jpg") pts = np.float32([[400, 464], [934, 544], [96, 1174], [960, 1328]]) img_wrap = wrapping(img, pts) img_gray, _, _ = preprocessing(img_wrap) img_gray2 = cv2.resize(img_gray, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC) img_th = cv2.adaptiveThreshold(img_gray2, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, -10) # ret,img_th = cv2.threshold(img_sharp,127,255,cv2.THRESH_BINARY) img_ocr, words = test_pytesseract2.get_OCR2(255 - img_th) img_gray = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2BGR) # img_blur = cv2.cvtColor(img_blur, cv2.COLOR_GRAY2BGR) # img_sharp = cv2.cvtColor(img_sharp, cv2.COLOR_GRAY2BGR) img_th = cv2.cvtColor(img_th, cv2.COLOR_GRAY2BGR) final1 = np.hstack((img_wrap, img_gray)) final2 = np.hstack((img_th, img_ocr)) final1 = cv2.resize(final1, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC) final = np.vstack((final1, final2)) cv2.imshow('img1', final1) cv2.imshow('img2', final2) cv2.waitKey(0) cv2.destroyAllWindows()
def try_wrap2(): img = cv2.imread("datas/images/licenseplate_04.jpg") img = cv2.resize(img, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC) pts = np.float32([[200, 232], [467, 272], [48, 587], [480, 664]]) img_wrap = wrapping(img, pts) img_gray, img_blur, img_sharp = preprocessing(img_wrap) img_th = cv2.adaptiveThreshold(img_sharp, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 4) img_ocr, words = test_pytesseract2.get_OCR2(img_th) img_gray = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2BGR) img_blur = cv2.cvtColor(img_blur, cv2.COLOR_GRAY2BGR) img_sharp = cv2.cvtColor(img_sharp, cv2.COLOR_GRAY2BGR) img_th = cv2.cvtColor(img_th, cv2.COLOR_GRAY2BGR) final1 = np.hstack((img_wrap, img_gray, img_blur)) final2 = np.hstack((img_sharp, img_th, img_ocr)) final = np.vstack((final1, final2)) cv2.imshow('img1', img) # cv2.imshow('img2', final2) cv2.waitKey(0) cv2.destroyAllWindows()
def get_binary_thresholded_image(cv2_image): img = convert_to_gray_image(cv2_image) img = cv2.medianBlur(img, 5) img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) return img
def predict_folder(self, pather): dirs = os.listdir(pather) corrects = [] wrongs = [] every = [] for folders in dirs: fold = os.fsdecode(folders) if fold.endswith((".DS_Store")): pass else: for filename in os.listdir(pather + "/" + fold): if filename.endswith((".DS_Store")): pass else: gray = cv2.imread( pather + "/%s" % fold + "/%s" % filename, 0) dilated_gray = cv2.dilate(gray, np.ones((7, 7), np.uint8)) bg_gray = cv2.medianBlur(dilated_gray, 21) diff_gray = 255 - cv2.absdiff(gray, bg_gray) norm_gray = diff_gray.copy() cv2.normalize(diff_gray, norm_gray, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1) _, thr_gray = cv2.threshold(norm_gray, 253, 0, cv2.THRESH_TRUNC) cv2.normalize(thr_gray, thr_gray, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1) gray1 = cv2.resize(255 - thr_gray, (112, 112), interpolation=cv2.INTER_AREA) gray = cv2.adaptiveThreshold( gray1, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 5, 3) (thresh, gray2) = cv2.threshold( gray1, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) img = self.image_manip(gray) img2 = self.image_manip(gray2) img = image.img_to_array(img) img2 = image.img_to_array(img2) test_img = img.reshape((1, 112, 112, 1)) test_img2 = img2.reshape((1, 112, 112, 1)) test_img = img.astype('float32') test_img2 = img2.astype('float32') test_img /= 255 test_img2 /= 255 test_img = np.expand_dims(test_img, axis=0) test_img2 = np.expand_dims(test_img2, axis=0) prediction = self.model.predict(test_img) prediction2 = self.model.predict(test_img2) x1 = np.argmax(prediction) x2 = np.argmax(prediction2) c = prediction[0][x1] v = prediction2[0][x2] b = prediction[0][x2] n = prediction2[0][x1] k = (c + b) / 2 j = (v + n) / 2 if k > j: classname = x1 if j > k: classname = x2 print(filename + "prediction: ", classname) # This list is to keep count of how many images are being classified every.append(filename) # This is for benchmark tests during development and demonstrating larger sets of images, # This requires the images to be labeled to see how many predictions were right or wrong, # THIS IS OPTIONAL, if you dont label your images you wont get a list of prediction accuracy, # But the predictions will be printed as outputs next to each image filename. if filename.startswith(str(classname)): corrects.append(filename) else: wrongs.append(filename) co = 0 w = 0 e = 0 for i in every: e += 1 for i in corrects: co += 1 for i in wrongs: w += 1 print("all numbers") print(e) print("right guess") print(co) print("wrong guess") print(w) print(wrongs) print("Running: " + "ALnet-2.0")
def DetectColorGrids(Color_checker_image): img_gray = cv.cvtColor(Color_checker_image,cv.COLOR_BGR2GRAY) img_denoise = cv.fastNlMeansDenoising(img_gray,10,7,21) img_threshold = cv.adaptiveThreshold(img_denoise,255,cv.ADAPTIVE_THRESH_MEAN_C,cv.THRESH_BINARY,21,3) kernel = np.ones((3,3), np.uint8) img_eroded = cv.erode(img_threshold,kernel) small_grid_size_range = GetSmallGridSizeRange(Color_checker_image) contours, _hierarchy = cv.findContours(img_eroded, cv.RETR_TREE, cv.CHAIN_APPROX_NONE) color_grid_contours = FilterColorGrid(contours, small_grid_size_range) centroid_positions = [] centroid_positions = FindContourCenter(color_grid_contours) centroids_no_overlap = [] centroids_no_overlap = FilterOutOverlapPoint(color_grid_contours,centroid_positions) #small to big, base on y axis centroids_no_overlap = sorted(centroids_no_overlap,key=lambda centroid: centroid[1]) rad_top = math.atan((centroids_no_overlap[1][1] - centroids_no_overlap[0][1]) / (centroids_no_overlap[1][0] - centroids_no_overlap[0][0])) angle_top = rad_top * 180 / PI centroids_no_overlap_size = len(centroids_no_overlap) rad_bottom = math.atan((centroids_no_overlap[centroids_no_overlap_size - 1][1] - centroids_no_overlap[centroids_no_overlap_size - 2][1]) / (centroids_no_overlap[centroids_no_overlap_size - 1][0] - centroids_no_overlap[centroids_no_overlap_size - 2][0])) angle_bottom = rad_bottom * 180 / PI angle_subtract = abs(angle_top - angle_bottom) if angle_subtract >= ANGLE_SUBTRACT_MAX_ENDURANCE: orientation_left_top = centroids_no_overlap[0] orientation_right_bottom=[0.0,0.0] for centroid in centroids_no_overlap: if centroid[0] < orientation_left_top[0]: orientation_left_top[0] = centroid[0] if centroid[1] < orientation_left_top[1]: orientation_left_top[1] = centroid[1] if centroid[0] > orientation_right_bottom[0]: orientation_right_bottom[0] = centroid[0] if centroid[1] > orientation_right_bottom[1]: orientation_right_bottom[1] = centroid[1] translation=[0.0,0.0] translation[0] = (orientation_right_bottom[0] - orientation_left_top[0]) / (TOTAL_COLUMNS - 1) translation[1] = (orientation_right_bottom[1] - orientation_left_top[1]) / (TOTAL_ROWS - 1) grids_position = np.zeros((TOTAL_ROWS, TOTAL_COLUMNS, 2), dtype=np.int) row_count = 0 col_count = 0 while(row_count < TOTAL_ROWS): col_count = 0 while(col_count < TOTAL_COLUMNS): grids_position[row_count][col_count][0] = orientation_left_top[0] + translation[0]*col_count grids_position[row_count][col_count][1] = orientation_left_top[1] + translation[1]*row_count col_count += 1 row_count += 1 return grids_position else: angle_avg = (angle_top + angle_bottom) / 2 img_rotation = np.zeros(img_gray.shape, dtype=np.uint8) rows, cols = img_gray.shape center = [cols / 2, rows / 2] rotation_mat = cv.getRotationMatrix2D((center[0],center[1]),angle_avg,1.0) img_rotation = cv.warpAffine(img_eroded,rotation_mat,(cols,rows)) contours, _hierarchy = cv.findContours(img_rotation, cv.RETR_TREE, cv.CHAIN_APPROX_NONE) color_grid_contours = FilterColorGrid(contours, small_grid_size_range) centroid_positions = [] centroid_positions = FindContourCenter(color_grid_contours) orientation_left_top = centroid_positions[0] orientation_right_bottom=[0.0,0.0] # contour_arr_all = np.zeros(img_gray.shape, dtype=np.uint8) i=0 for centroid in centroid_positions: if centroid[0] < orientation_left_top[0]: orientation_left_top[0] = centroid[0] if centroid[1] < orientation_left_top[1]: orientation_left_top[1] = centroid[1] if centroid[0] > orientation_right_bottom[0]: orientation_right_bottom[0] = centroid[0] if centroid[1] > orientation_right_bottom[1]: orientation_right_bottom[1] = centroid[1] i+=1 translation=[0.0,0.0] translation[0] = (orientation_right_bottom[0] - orientation_left_top[0]) / (TOTAL_COLUMNS - 1) translation[1] = (orientation_right_bottom[1] - orientation_left_top[1]) / (TOTAL_ROWS - 1) grid_coordinate = np.zeros((TOTAL_ROWS, TOTAL_COLUMNS, 2), dtype=np.int) row_count = 0 col_count = 0 while(row_count < TOTAL_ROWS): col_count = 0 while(col_count < TOTAL_COLUMNS): grid_coordinate[row_count][col_count][0] = orientation_left_top[0] + translation[0]*col_count grid_coordinate[row_count][col_count][1] = orientation_left_top[1] + translation[1]*row_count col_count += 1 row_count +=1 grids_position = np.zeros((TOTAL_ROWS, TOTAL_COLUMNS, 2), dtype=np.int) temp_coordinate = [0,0] row_count = 0 col_count = 0 while(row_count < TOTAL_ROWS): col_count = 0 while(col_count < TOTAL_COLUMNS): temp_coordinate = GetRotationPoint((grid_coordinate[row_count][col_count][0],grid_coordinate[row_count][col_count][1]), cols, rows, -1 * angle_avg) grids_position[row_count][col_count][0] = temp_coordinate[0] grids_position[row_count][col_count][1] = temp_coordinate[1] col_count += 1 row_count += 1 return grids_position
from cv2 import cv2 ''' Применить на практике бинаризацию всех типов для изображений. Подобрано по 2 примера изображений на каждый тип, иллюстрирующие разницу результатов использованных методов. ''' """Для дорог лучше подходит метод TRESH_TOZERO, т.к. картинка монотонна, а полученное изображение четчё при ручном подборе трешхолда""" img = cv2.imread('Doroga1.png', cv2.IMREAD_GRAYSCALE) treshold, img1 = cv2.threshold(img, thresh=200, maxval=255, type=cv2.THRESH_TOZERO) treshold, img2 = cv2.threshold(img, 200, 255, cv2.THRESH_OTSU) img3 = cv2.adaptiveThreshold(img, maxValue=255, adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C, thresholdType=cv2.THRESH_BINARY, blockSize=11, C=-4) img_2 = cv2.imread('Doroga2.png', cv2.IMREAD_GRAYSCALE) treshold, img4 = cv2.threshold(img_2, 200, 255, cv2.THRESH_TOZERO) treshold, img5 = cv2.threshold(img_2, 200, 255, cv2.THRESH_OTSU) img6 = cv2.adaptiveThreshold(img_2, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 3, -4) """Для сканов лучше подходит как адаптивный метод, так и TRESH_OTSU, т.к. скан документа не монотонный""" # img = cv2.imread('Scan1.png', cv2.IMREAD_GRAYSCALE) # treshold, img1 = cv2.threshold(img, 128, 255, cv2.THRESH_TOZERO) # treshold, img2 = cv2.threshold(img, 128, 255, cv2.THRESH_OTSU) # img3 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 31, 30)
# from googletrans import Translator # translator = Translator() # hindi_sent = "नमस्कार, धृति!" # translation = translator.translate(hindi_sent,dest="en") # translated_sentence = (f"{translation.text}") # print(translated_sentence) from cv2 import cv2 import numpy as np import pytesseract # from google.colab import cv2_imshow img = cv2.imread('test2.png') img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) reduced_noise = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,12) # cv2.imshow("",reduced_noise) # cv2.waitKey(0) print("\nImage is loaded succesfully") text=pytesseract.image_to_string(reduced_noise,lang='eng') print("The text is :\n",text)
def decide(filename): # reading image to grayscale gray = cv2.imread(f"/content/mnistcnn/digits/{filename}", 0) # dilate img dilated_gray = cv2.dilate(gray, np.ones((7, 7), np.uint8)) # median blur the dilated img to further suppress detail bg_gray = cv2.medianBlur(dilated_gray, 21) # calculate difference between gray(original img) and bg_gray. # identical pixels will be black(close to 0 difference), the digits details will be white(large difference). diff_gray = 255 - cv2.absdiff(gray, bg_gray) # normalize the image, so that we use the full dynamic range. norm_gray = diff_gray.copy() cv2.normalize(diff_gray, norm_gray, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1) # now the image is still gray in some areas, so we truncate that away and re-normalize the image. _, thr_gray = cv2.threshold(norm_gray, 253, 0, cv2.THRESH_TRUNC) cv2.normalize(thr_gray, thr_gray, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1) # inverting color and rezising image to 112x112 gray1 = cv2.resize(255 - thr_gray, (112, 112), interpolation=cv2.INTER_AREA) # we have variables gray and gray2 to differentiate between two ways of thresholding. # Alnet-2.0 will make predictions on both of these, and combine the two to get our resulting classification. gray = cv2.adaptiveThreshold(gray1, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 7, 3) (thresh, gray2) = cv2.threshold(gray1, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) # converting PIL image to numpy array img = image.img_to_array(gray) img2 = image.img_to_array(gray2) # reshaping to 1 channel (its only 1 image), 112x112 shape, and 1 color channel (grayscale) test_img = img.reshape((1, 112, 112, 1)) test_img2 = img2.reshape((1, 112, 112, 1)) # change datatype from uint8 to float32 test_img = img.astype('float32') test_img2 = img2.astype('float32') # normalizing pixels in the decimal range 0 to 1 test_img /= 255 test_img2 /= 255 # extend axis at index[0] to ready for prediction test_img = np.expand_dims(test_img, axis=0) test_img2 = np.expand_dims(test_img2, axis=0) # get network output (10 nodes) # prediction_list is prediction of gray # prediction2_list is prediction of gray2 prediction_list = model.predict(test_img) prediction2_list = model.predict(test_img2) return prediction_list, prediction2_list, test_img, test_img2
from cv2 import cv2 as cv import numpy as np img = cv.imread('cafe.jpg', 0) img2 = cv.imread('olho.PNG', 0) metodo = cv.THRESH_BINARY_INV # objeto de interesse na cor branca #metodo = cv.THRESH_BINARY # objeto de interesse na cor preta ret, imgBin = cv.threshold(img, 200, 255, metodo) # pixels com valores entre o limiar sao considerado como objeto de interesse img_gaussian = cv.medianBlur(img2, 7) #diferentes valores de limiar para cada regiao da imagem #obtem melhor resultado considerando constraste mean = cv.adaptiveThreshold(img_gaussian, 255, cv.ADAPTIVE_THRESH_MEAN_C, metodo, 11, 5) gaussian = cv.adaptiveThreshold(img_gaussian, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, metodo, 11, 2) cv.imshow('original', img) cv.imshow('imgBin', imgBin) cv.imshow('mean', mean) cv.imshow('gaussian', gaussian) cv.waitKey(0) cv.destroyAllWindows()
from cv2 import cv2 as cv def rescaleFrame(frame, scale=0.75): width = int(frame.shape[1] * scale) height = int(frame.shape[0] * scale) dimensions = (width,height) return cv.resize(frame, dimensions, interpolation=cv.INTER_AREA) #getting image init = cv.imread('imagen/wppstalkerlarge.jpg') img = rescaleFrame(init, 0.4) cv.imshow('stalker', img) gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) cv.imshow('GRay', gray) #Simple Thresholding threshold, thresh = cv.threshold(gray, 100, 255, cv.THRESH_BINARY) cv.imshow('threshImaged', thresh) threshold, threshInv = cv.threshold(gray, 100, 255, cv.THRESH_BINARY_INV) cv.imshow('threshImagedInverse', threshInv) #Adaptive Thresholding adaptiveThresh = cv.adaptiveThreshold(gray, 255, cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY, 11, 5) cv.imshow('adaptive thresholding', adaptiveThresh) cv.waitKey(0)
def videoCap(): # pytesseract.pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract" cap = cv2.VideoCapture( 'C:/Users/jeawa/Desktop/project/LPR_Project/project/video/test_car.mp4' ) count = 0 before_chars = "" while True: el = cv2.getTickCount() fps = cap.get(5) ret, img_ori = cap.read() height, width, channel = img_ori.shape # 높이, 너비, 채널 확보 gray = cv2.cvtColor(img_ori, cv2.COLOR_BGR2GRAY) # img_ori = cv2.imread('LPR_Project/project/image/3.jpg') # 이미지 불러오기 # gray = cv2.cvtColor(img_ori, cv2.COLOR_BGR2GRAY) img_blurred = cv2.GaussianBlur(gray, ksize=(3, 3), sigmaX=0) # 노이즈 제거 img_thresh = cv2.adaptiveThreshold( img_blurred, maxValue=255.0, adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C, thresholdType=cv2.THRESH_BINARY_INV, blockSize=19, # 12 통과 15,20 C=9) contours, _ = cv2.findContours( # 윤곽선을 찾기 img_thresh, mode=cv2.RETR_LIST, # method=cv2.CHAIN_APPROX_TC89_KCOS # ) temp_result = np.zeros((height, width, channel), dtype=np.uint8) cv2.drawContours( temp_result, # 원본이미지 contours=contours, # contours 정보 contourIdx=-1, # -1 : 전체 color=(255, 255, 255), thickness=1) contours_dict = [] for contour in contours: x, y, w, h = cv2.boundingRect(contour) # 컴투어를 감싸는 사각형 # cv2.rectangle( # temp_result, # pt1=(x, y), # pt2=(x+w, y+h), # color=(255, 255, 255), # thickness=1 # ) contours_dict.append({ 'contour': contour, 'x': x, 'y': y, 'w': w, 'h': h, 'cx': x + (w / 2), # 중심 좌표 'cy': y + (h / 2) }) MIN_AREA, MAX_AREA = 100, 1000 # boundingRect(사각형)의 최소넓이 MIN_WIDTH, MIN_HEIGHT = 2, 8 # boundingRect의 최소 넓이, 높이 2, 8 MIN_RATIO, MAX_RATIO = 0.3, 0.9 # boundingRect의 가로 세로 비율 possible_contours = [] # 위의 조건의 만족하는 사각형 cnt = 0 for d in contours_dict: area = d['w'] * d['h'] # 가로 * 세로 = 면적 ratio = d['w'] / d['h'] # 가로 / 세로 = 비율 if MIN_AREA < area < MAX_AREA and d['w'] > MIN_WIDTH and d[ 'h'] > MIN_HEIGHT and MIN_RATIO < ratio < MAX_RATIO: d['idx'] = cnt # 조건의 맞는 값을 idx에 저장한다. cnt += 1 possible_contours.append(d) # possible_contour를 업데이트한다. # temp_result = np.zeros((height, width, channel), dtype=np.uint8) for d in possible_contours: # cv2.drawContours(temp_result, d['contour'], -1, (255, 255, 255)) cv2.rectangle(temp_result, pt1=(d['x'], d['y']), pt2=(d['x'] + d['w'], d['y'] + d['h']), color=(0, 0, 255), thickness=1) MAX_DIAG_MULTIPLYER = 4 # 대각선의 5배 안에 있어야함 MAX_ANGLE_DIFF = 12.0 # 12.0 #세타의 최대값 12, 0.7, 0.5, 0.6, 3 MAX_AREA_DIFF = 0.3 # 0.5 #면적의 차이 MAX_WIDTH_DIFF = 0.5 # 너비차이 MAX_HEIGHT_DIFF = 0.6 # 높이차이 MIN_N_MATCHED = 4 # 3 #위의 조건이 3개 미만이면 뺀다 def find_chars(contour_list): # matched_result_idx = [] # idx 값 저장 for d1 in contour_list: matched_contours_idx = [] for d2 in contour_list: if d1['idx'] == d2['idx']: # d1 과 d2가 같으면 컨티뉴 continue dx = abs(d1['cx'] - d2['cx']) dy = abs(d1['cy'] - d2['cy']) diagonal_length1 = np.sqrt(d1['w']**2 + d1['h']**2) distance = np.linalg.norm( np.array([d1['cx'], d1['cy']]) - np.array([d2['cx'], d2['cy']])) # 대각선 길이 if dx == 0: angle_diff = 90 # 0일 때 각도 90도 (예외처리) else: angle_diff = np.degrees(np.arctan(dy / dx)) # 세타 구하기 area_diff = abs(d1['w'] * d1['h'] - d2['w'] * d2['h']) / ( d1['w'] * d1['h']) # 면적 비율 width_diff = abs(d1['w'] - d2['w']) / d1['w'] # 너비비율 height_diff = abs(d1['h'] - d2['h']) / d1['h'] # 높이비율 # 조건들 if distance < diagonal_length1 * MAX_DIAG_MULTIPLYER \ and angle_diff < MAX_ANGLE_DIFF and area_diff < MAX_AREA_DIFF \ and width_diff < MAX_WIDTH_DIFF and height_diff < MAX_HEIGHT_DIFF: # d2만 넣었기 때문에 마지막으로 d1을 넣은다 matched_contours_idx.append(d2['idx']) # append this contour matched_contours_idx.append(d1['idx']) if len(matched_contours_idx) < MIN_N_MATCHED: # 3개 이하이면 번호판 x continue matched_result_idx.append(matched_contours_idx) # 최종 후보군 unmatched_contour_idx = [] # 최종후보군이 아닌 애들 for d4 in contour_list: if d4['idx'] not in matched_contours_idx: unmatched_contour_idx.append(d4['idx']) unmatched_contour = np.take(possible_contours, unmatched_contour_idx) # recursive recursive_contour_list = find_chars(unmatched_contour) for idx in recursive_contour_list: matched_result_idx.append(idx) # 번호판 이외의 값을 재정의 break return matched_result_idx result_idx = find_chars(possible_contours) matched_result = [] for idx_list in result_idx: matched_result.append(np.take(possible_contours, idx_list)) # visualize possible contours temp_result = np.zeros((height, width, channel), dtype=np.uint8) for r in matched_result: for d in r: cv2.rectangle(temp_result, pt1=(d['x'], d['y']), pt2=(d['x'] + d['w'], d['y'] + d['h']), color=(0, 0, 255), thickness=1) PLATE_WIDTH_PADDING = 1.2 # 1.3 PLATE_HEIGHT_PADDING = 1.5 # 1.5 MIN_PLATE_RATIO = 3 MAX_PLATE_RATIO = 7 #10 plate_imgs = [] plate_infos = [] for i, matched_chars in enumerate(matched_result): sorted_chars = sorted(matched_chars, key=lambda x: x['cx']) # x방향으로 순차적으로 정렬 plate_cx = (sorted_chars[0]['cx'] + sorted_chars[-1]['cx']) / 2 # 센터 좌표 구하기 plate_cy = (sorted_chars[0]['cy'] + sorted_chars[-1]['cy']) / 2 plate_width = (sorted_chars[-1]['x'] + sorted_chars[-1]['w'] - sorted_chars[0]['x']) * PLATE_WIDTH_PADDING # 너비 # sum_height = 0 # for d in sorted_chars: # sum_height += d['h'] # plate_height = int(sum_height / len(sorted_chars) # * PLATE_HEIGHT_PADDING) # 높이 plate_height = (sorted_chars[-1]['y'] - sorted_chars[0]['y'] + sorted_chars[-1]['h']) * PLATE_HEIGHT_PADDING # triangle_height = sorted_chars[-1]['cy'] - sorted_chars[0]['cy'] triangle_hypotenus = np.linalg.norm( np.array([sorted_chars[0]['cx'], sorted_chars[0]['cy']]) - np.array([sorted_chars[-1]['cx'], sorted_chars[-1]['cy']])) angle = np.degrees(np.arcsin(triangle_height / triangle_hypotenus)) rotation_matrix = cv2.getRotationMatrix2D(center=(plate_cx, plate_cy), angle=angle, scale=1.0) # 회전 img_rotated = cv2.warpAffine(img_thresh, M=rotation_matrix, dsize=(width, height)) img_cropped = cv2.getRectSubPix(img_rotated, patchSize=(int(plate_width), int(plate_height)), center=(int(plate_cx), int(plate_cy))) if img_cropped.shape[1] / img_cropped.shape[ 0] < MIN_PLATE_RATIO or img_cropped.shape[ 1] / img_cropped.shape[ 0] < MIN_PLATE_RATIO > MAX_PLATE_RATIO: continue plate_imgs.append(img_cropped) plate_infos.append({ 'x': int(plate_cx - plate_width / 2), 'y': int(plate_cy - plate_height / 2), 'w': int(plate_width), 'h': int(plate_height) }) for i, plate_img in enumerate(plate_imgs): plate_img = cv2.resize(plate_img, dsize=(0, 0), fx=1.6, fy=1.6) _, plate_img = cv2.threshold(plate_img, thresh=0.0, maxval=255.0, type=cv2.THRESH_BINARY | cv2.THRESH_OTSU) # plt.imshow(plate_img, cmap='gray') # plt.show() # find contours again (same as above) contours, _ = cv2.findContours(plate_img, mode=cv2.RETR_LIST, method=cv2.CHAIN_APPROX_SIMPLE) plate_min_x, plate_min_y = plate_img.shape[1], plate_img.shape[ 0] plate_max_x, plate_max_y = 0, 0 for contour in contours: x, y, w, h = cv2.boundingRect(contour) area = w * h ratio = w / h if area > MIN_AREA \ and w > MIN_WIDTH and h > MIN_HEIGHT \ and MIN_RATIO < ratio < MAX_RATIO: if x < plate_min_x: plate_min_x = x if y < plate_min_y: plate_min_y = y if x + w > plate_max_x: plate_max_x = x + w if y + h > plate_max_y: plate_max_y = y + h img_result = plate_img[plate_min_y:plate_max_y, plate_min_x:plate_max_x] img_result = cv2.GaussianBlur(img_result, ksize=(3, 3), sigmaX=0) _, img_result = cv2.threshold(img_result, thresh=0.0, maxval=255.0, type=cv2.THRESH_BINARY | cv2.THRESH_OTSU) img_result = cv2.copyMakeBorder(img_result, top=10, bottom=10, left=20, right=10, borderType=cv2.BORDER_CONSTANT, value=(0, 0, 0)) chars = pytesseract.image_to_string(img_result, lang='kor', config='--psm 7 --oem 0') result_chars = '' has_digit = False # <= ord('힣') for c in chars: if \ + ord('가') == ord(c) or \ + ord('나') == ord(c) or \ + ord('다') == ord(c) or \ + ord('라') == ord(c) or \ + ord('마') == ord(c) or \ + ord('거') == ord(c) or \ + ord('너') == ord(c) or \ + ord('더') == ord(c) or \ + ord('러') == ord(c) or \ + ord('머') == ord(c) or \ + ord('버') == ord(c) or \ + ord('서') == ord(c) or \ + ord('어') == ord(c) or \ + ord('저') == ord(c) or \ + ord('고') == ord(c) or \ + ord('노') == ord(c) or \ + ord('도') == ord(c) or \ + ord('로') == ord(c) or \ + ord('모') == ord(c) or \ + ord('보') == ord(c) or \ + ord('소') == ord(c) or \ + ord('오') == ord(c) or \ + ord('조') == ord(c) or \ + ord('구') == ord(c) or \ + ord('누') == ord(c) or \ + ord('두') == ord(c) or \ + ord('루') == ord(c) or \ + ord('무') == ord(c) or \ + ord('부') == ord(c) or \ + ord('수') == ord(c) or \ + ord('우') == ord(c) or \ + ord('주') == ord(c) or \ + ord('아') == ord(c) or \ + ord('바') == ord(c) or \ + ord('사') == ord(c) or \ + ord('자') == ord(c) or \ + ord('배') == ord(c) or \ + ord('하') == ord(c) or \ + ord('허') == ord(c) or \ + ord('호') == ord(c) or c.isdigit(): if c.isdigit(): has_digit = True result_chars += c # print(result_chars) # print(len(result_chars)) if result_chars == "": pass else: if before_chars == result_chars and 6 < len(result_chars) < 9: count += 1 else: before_chars = result_chars count = 0 if count == 1: print(result_chars) count = 0 before_chars = "" # print(fps + " : 프레임 영상입니다.") print(fps) a = np.hstack((img_ori, temp_result)) cv2.imshow("Go", a) if cv2.waitKey(42) == ord('q'): break e2 = cv2.getTickCount() time = (e2 - el) / cv2.getTickFrequency() print(time) # print("1프레임 : 1초당 처리속도는 " + time + "입니다.") cap.release() cv2.destroyAllWindows()
from cv2 import cv2 import numpy as np from matplotlib import pyplot as plt img = cv2.imread('./images/sudoku.png', 0) img = cv2.medianBlur(img, 5) ret, th1 = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) th2 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2) th3 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) titles = [ 'Original Image', 'Global Thresholding (v = 127)', 'Adaptive Mean Thresholding', 'Adaptive Gaussian Thresholding' ] images = [img, th1, th2, th3] for i in range(4): plt.subplot(2, 2, i + 1), plt.imshow(images[i], 'gray') plt.title(titles[i]) plt.xticks([]), plt.yticks([]) plt.show()
def capture_mrz(window: sg.Window, camera_id: int) -> Tuple[List[str], Image.Image]: """ Capture the MRZ by using OCR and the camera footage. :returns: MRZ lines in a list """ cap = cv2.VideoCapture(camera_id) tess_api = PyTessBaseAPI(init=False, psm=PSM.SINGLE_BLOCK_VERT_TEXT) tess_api.InitFull( # https://github.com/DoubangoTelecom/ultimateMRZ-SDK/tree/master/assets/models path="text_detection", lang="mrz", variables={ "load_system_dawg": "false", "load_freq_dawg": "false", "tessedit_char_whitelist": "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ<", }, ) # mrz_list: List[List[str]] = [] pool = ThreadPool(processes=1) ocr_running = False while True: _, frame = cap.read() mrz = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) mrz = cv2.adaptiveThreshold(mrz, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 21, 10) # mrz = cv2.adaptiveThreshold(mrz, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,3,2) # mrz = cv2.GaussianBlur(mrz, (5,5), 0) # _, mrz = cv2.threshold(mrz, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) # mrz = cv2.GaussianBlur(mrz, (5,5), 0) # mrz = cv2.medianBlur(mrz, 3) frame_shown = copy.deepcopy(mrz) width = 320 height = int(frame_shown.shape[0] * (320 / frame_shown.shape[1])) frame_shown = cv2.resize(frame_shown, (width, height)) alpha = 0.8 frame_overlay = add_mrz_overlay(copy.deepcopy(frame_shown), "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<", 3, 0.9, False) frame_overlay = add_mrz_overlay( frame_overlay, "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<", 2, 0.9, True) cv2.addWeighted(frame_shown, alpha, frame_overlay, 1 - alpha, 0, frame_shown) imgbytes = cv2.imencode(".png", frame_shown)[1].tobytes() window.write_event_value("-SHOW MRZ-", [imgbytes]) mrz = Image.fromarray(mrz) if not ocr_running: checked_frame = Image.fromarray(frame[:, :, ::-1]) tess_api.SetImage(mrz) async_result = pool.apply_async(tess_api.GetUTF8Text) ocr_running = True if async_result.ready(): ocr_running = False mrz_text = async_result.get() result = parse_mrz_ocr(mrz_text) if result is not None: break # if result and len(mrz_list) < 3: # mrz_list.append(result) # elif not result: # mrz_list = [] # else: # if all(x == mrz_list[0] for x in mrz_list): # break # When everything done, release the capture cap.release() # cv2.destroyAllWindows() tess_api.End() # return mrz_list[0] window.write_event_value("-HIDE MRZ-", "") return (result, checked_frame)
from cv2 import cv2 import numpy as np img = cv2.imread('sudoku.jpg') # NOTE: If not converted to grayscale the following error is coming: # error: (-215:Assertion failed) src.type() == CV_8UC1 in function 'cv::adaptiveThreshold' # The problem is that you are trying to use adaptive thresholding to an image that is not in greyscale. # And the function only works with a greyscale images. # So you have to convert your image to a greyscale format as it is described in documentation. # They read the image in a greyscale format with: img = cv2.imread('dave.jpg',0). You can also convert it # to greyscale img_grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # SYNTAX: adaptiveThreshold(src, maxValue, adaptiveMethod, thresholdType, blockSize, C) # NOTE: More the value of 'C' the more white the image becomes adap_img = cv2.adaptiveThreshold(img_grey, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 1) # MEAN adap_img1 = cv2.adaptiveThreshold(img_grey, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) # GAUSSIAN cv2.imshow('Original Image', img) cv2.imshow('Adaptive_Mean Image', adap_img) cv2.imshow('Adaptive_Gaussian Image', adap_img1) if cv2.waitKey(0) == 27: cv2.destroyAllWindows()
def predict_chosen(self): print("") # selecting image to predict filename = askopenfilename() # reading image to grayscale gray = cv2.imread(filename, 0) # dilate img dilated_gray = cv2.dilate(gray, np.ones((7, 7), np.uint8)) # median blur the dilated img to further suppress detail bg_gray = cv2.medianBlur(dilated_gray, 21) # calculate difference between gray(original img) and bg_gray. # identical pixels will be black(close to 0 difference), the digits details will be white(large difference). diff_gray = 255 - cv2.absdiff(gray, bg_gray) # normalize the image, so that we use the full dynamic range. norm_gray = diff_gray.copy() cv2.normalize(diff_gray, norm_gray, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1) # now the image is still gray in some areas, so we truncate that away and re-normalize the image. _, thr_gray = cv2.threshold(norm_gray, 253, 0, cv2.THRESH_TRUNC) cv2.normalize(thr_gray, thr_gray, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1) # inverting color and rezising image to 112x112 gray1 = cv2.resize(255 - thr_gray, (112, 112), interpolation=cv2.INTER_AREA) # we have variables gray and gray2 to differentiate between two ways of thresholding. # Alnet-2.0 will make predictions on both of these, and combine the two to get our resulting classification. gray = cv2.adaptiveThreshold(gray1, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 5, 3) (thresh, gray2) = cv2.threshold(gray1, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) img = self.image_manip(gray) img2 = self.image_manip(gray2) # converting PIL image to numpy array img = image.img_to_array(img) img2 = image.img_to_array(img2) # reshaping to 1 channel (its only 1 image), 112x112 shape, and 1 color channel (grayscale) test_img = img.reshape((1, 112, 112, 1)) test_img2 = img2.reshape((1, 112, 112, 1)) # change datatype from uint8 to float32 test_img = img.astype('float32') test_img2 = img2.astype('float32') # normalizing pixels in the decimal range 0 to 1 test_img /= 255 test_img2 /= 255 # extend axis at index[0] to ready for prediction test_img = np.expand_dims(test_img, axis=0) test_img2 = np.expand_dims(test_img2, axis=0) # get network output (10 nodes) # prediction_list is prediction of gray # prediction2_list is prediction of gray2 prediction_list = self.model.predict(test_img) prediction2_list = self.model.predict(test_img2) # x1 and x2 are indexes for elements (what digit is guessed) of highest value for images gray and gray2 x1 = np.argmax(prediction_list) x2 = np.argmax(prediction2_list) # c is the highest value in prediction_list # v is the highest value in prediction2_list c = prediction_list[0][x1] v = prediction2_list[0][x2] # b is the value of prediction_list at index of prediction2_list highest value # n is the value of prediction2_list at index of predition_list highest value b = prediction_list[0][x2] n = prediction2_list[0][x1] # this helps us decide what prediction is most likely to be correct, by comparing the values below. k = (c + b) / 2 j = (v + n) / 2 if k > j: classname = x1 if j > k: classname = x2 # reshaping again for showing image to user test_img = test_img.reshape((112, 112)) test_img2 = test_img2.reshape((112, 112)) # lines 181-195 displays to the user gray and gray2, values of k and j, and the estimated classname for the original image chosen fig = plt.figure(figsize=(8, 8)) columns = 2 rows = 1 plt.gray() fig.add_subplot(rows, columns, 1).set_title("adaptive_Threshold\n k = {}".format(k)) plt.imshow(test_img) fig.add_subplot(rows, columns, 2).set_title("Threshold\n j = {}".format(j)) plt.imshow(test_img2) if k > j: fig.suptitle("Image shows a {}\n k > j".format(classname)) if j > k: fig.suptitle("Image shows a {}\n j > k".format(classname)) plt.show() print("\nadaptive_Threshold prediction:\n", prediction_list, "\n") print("Threshold prediction:\n", prediction2_list) print("\nRunning: " + "Alnet-2.0.")
# # cap.set(cv2.CAP_PROP_BUFFERSIZE, 3); # cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) ret, original_frame = cap.read() out.write(original_frame) #we will use this later gray = cv2.cvtColor(original_frame, cv2.COLOR_BGR2GRAY) # Blurring the unwanted stuff blur = cv2.GaussianBlur(gray, (9, 9), 0) # Adaptive threshold for to obtain a Binary Image from the frame # followed by Hough lines probablistic transform to highlight the straight line proposals # https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_thresholding/py_thresholding.html adaptive = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) processed = cv2.bitwise_not(adaptive, adaptive) # processed_not_dilated = processed.cop # Dilate the image to increase the size of the grid lines. kernel = np.array([[0., 1., 0.], [1., 1., 1.], [0., 1., 0.]], dtype=np.uint8) processed = cv2.dilate(processed, kernel) # Find the proposals sudoku contour- Contours are all posssible points. We will create the grid using contours contours, hier = cv2.findContours(processed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) external_contours = cv2.drawContours(processed.copy(), contours, -1, (255, 0, 0), 2) current_time = (time.time() - starttime)
def prepareData(self,cvl=False,iam=False,cvl_iam=False): input_len = [] text_list = [] original_text = [] label_len = [] max_len = 0 image = [] if iam: print('LOADING IAM DATASET') image_name_path,text,len_text = self.read_data() elif cvl: print('LOADING CVL DATASET') path = r'D:\cvl-database-1-1\testset\words' image_name_path = [ ] text = [ ] len_text = [ ] for root,dirs,files in os.walk(path): for image_name in files: image_name_path.append(os.path.join(root,image_name)) val = image_name.split('-')[-1].split('.')[0] text.append(val.split('.')[0]) len_text.append(len(val)) else: ## IN DEVELOPMENT DONT CALL THIS LOOP image_name_path,text,len_text = self.read_data() for root,dirs,files in os.walk(path): for image_name in files: image_name_path.append(os.path.join(root,image_name)) val = image_name.split('-')[-1].split('.')[0] text.append(val.split('.')[0]) len_text.append(len(val)) for image_name,text_word,text_len in tqdm(zip(image_name_path,text,len_text)): load_image = cv2.imread(image_name) try: img = cv2.cvtColor(load_image,cv2.COLOR_BGR2GRAY) img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,blockSize=91,C=11) except: continue # Dialiate and eroding of the image to make the image look much clearer kernal = np.ones((5,5),np.uint8) #img = cv2.erode(img,kernal,1) #img = cv2.dilate(img,kernal,1) # Image size adjustment to make all the image of shape (128,32)(h*w) height,width = img.shape if ((height > 32) or (width > 128)): img = cv2.resize(img,(128,32),interpolation = cv2.INTER_AREA) else: if height < 32: add_ones = np.ones((32-height,width)) * 255 try: img = np.concatenate((img,add_ones)) except: continue if width < 128: add_ones = np.ones((height,128-width)) * 255 try: img = np.concatenate((img,add_ones),axis=1) except: continue img = np.expand_dims(img,axis=2) # Encode text encode_text = self.__encode_string_to_numbers(text_word) # Len of text if text_len > max_len: max_len = text_len image.append(img) text_list.append(encode_text) original_text.append(text_word) input_len.append(len(encode_text)) label_len.append(len(text_word)) return image,text_list,input_len,label_len,original_text,max_len
while True: ret, frame = cam.read() if not ret: break cv2.imshow("test", frame) if cv2.waitKey(1) & 0xFF == 27: break elif cv2.waitKey(1) & 0xFF == 32: img_name = "opencv_frame_{}.png".format(img_counter) cv2.imwrite(img_name, frame) img_counter += 1 cam.release() cv2.destroyAllWindows() img = cv2.imread('//project//project//opencv_frame_0.png') gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray = cv2.medianBlur(gray, 5) edges = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 21, 8) color = cv2.medianBlur(img, 19) cartoon = cv2.bitwise_and(color, color, mask=edges) cv2.imwrite("//project//project//media//temp.png", cartoon) print("/media/temp.png")
import numpy as np from cv2 import cv2 import pytesseract from PIL import Image # Load image, grayscale, adaptive threshold image = cv2.imread('captchas/10.png') gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 3) # Morph open kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 3)) opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1) # Remove noise by filtering using contour area cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] for c in cnts: area = cv2.contourArea(c) if area < 10: cv2.drawContours(opening, [c], -1, (0, 0, 0), -1) # Invert image for result result = 255 - opening cv2.imshow('thresh', thresh) cv2.imshow('opening', opening) cv2.imshow('result', result) cv2.imwrite("result2.png", result)
motoren = simulator # CPU quälen: while True: # frame lesen dieser_frame = kamera.get_frame() ######################################## # verschiedene Varianten von threshold # ######################################## # Kopie von dieser_frame in Graustufen konvertieren frame_graustufen = cv2.cvtColor(dieser_frame, cv2.COLOR_BGR2GRAY) _, frame_schwarz_weiss1 = cv2.threshold(frame_graustufen, 110, 255, cv2.THRESH_BINARY_INV) frame_schwarz_weiss2 = cv2.adaptiveThreshold( frame_graustufen, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 9) frame_schwarz_weiss3 = cv2.adaptiveThreshold( frame_graustufen, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 6) # Kamerabild anzeigen cv2.imshow("Original", dieser_frame) cv2.imshow("Global Threshold", frame_schwarz_weiss1) cv2.imshow("Adaptive Mean", frame_schwarz_weiss2) cv2.imshow("Adaptive Gaussian", frame_schwarz_weiss3) # Motoren ansteuern motoren.set_speed(30, 30) # Pausieren mit p, bewegen mit w,a,s,d, rotieren mit q,e, beenden mit k
As = [0,30,60,90,120,150] filtered_img = np.zeros(image_nor.shape, np.float32) for A in As: #cv.getGaborKernel(ksize, sigma, theta, lambd, gamma[, psi[, ktype]]) g_kernel = cv2.getGaborKernel((27, 27), 3.4, np.pi*A/180, 8.1, 0.9, 0, ktype=cv2.CV_32F) _filtered_img = cv2.filter2D(image_nor, cv2.CV_8UC3, g_kernel) filtered_img += _filtered_img # normalizacja filtru filtered_img = filtered_img / filtered_img.max() *255 filtered_img = filtered_img.astype(np.uint8) # uzyskanie z filtracji kodu dwuwymiarowego i zbinaryzowanego filtered_img = cv2.cvtColor(filtered_img, cv2.COLOR_BGR2GRAY) filtered_img = cv2.adaptiveThreshold(filtered_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 19, 1) # obsługa zapisu kodów lub wyszukiwania if Baza.count(file)==True: cv2.imwrite("Baza/kod"+array2string(np.array(files.index(file)))+".bmp",filtered_img) print(file) else: Search(filtered_img, file) else: NotFound.append(file) #print(file + " ----> błąd: nie wykryto tęczówki") continue cv2.waitKey(0)
width_ = w height_ = h #Or force it to be A4 format in 300 DPI width_ = 1240 height_ = 1754 #New coordinates new_rect = np.array( [[0, 0], [width_ - 1, 0], [width_ - 1, height_ - 1], [0, height_ - 1]], dtype="float32") M = cv2.getPerspectiveTransform(rect, new_rect) output_gray = cv2.warpPerspective(gray, M, (width_, height_)) output = cv2.adaptiveThreshold(output_gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 5, 2) except Exception as e: print(str(e)) sys.exit(0) print("Print") # MATPLOTLIB imgs = [img, output_gray, output] i = 1 for img in imgs: img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) plt.subplot(1, len(imgs), i) plt.imshow(img)
im2 = imgWarpColored.copy() im2 = unsharp_mask(im2) cv2.imshow("b", im2) im3 = im1.copy() im3 = unsharp_mask(im3) cv2.imshow("a+b", im3) im4 = imgWarpColored.copy() im4 = cv2.GaussianBlur(im4, (5, 5), 3) im4 = cv2.addWeighted(im4, 1.5, im4, -0.5, 0, im4) cv2.imshow("a+b+c", im4) # APPLY ADAPTIVE THRESHOLD imgWarpGray = cv2.cvtColor(imgWarpColored, cv2.COLOR_BGR2GRAY) imgAdaptiveThre = cv2.adaptiveThreshold(imgWarpGray, 255, 1, 1, 7, 2) imgAdaptiveThre = cv2.bitwise_not(imgAdaptiveThre) imgAdaptiveThre = cv2.medianBlur(imgAdaptiveThre, 3) cv2.imshow("Warped Image With corners after adaptive theshold1", imgAdaptiveThre) imgWarpGray1 = cv2.cvtColor(im3, cv2.COLOR_BGR2GRAY) imgAdaptiveThre1 = cv2.adaptiveThreshold(imgWarpGray1, 255, 1, 1, 7, 2) imgAdaptiveThre1 = cv2.bitwise_not(imgAdaptiveThre1) imgAdaptiveThre1 = cv2.medianBlur(imgAdaptiveThre1, 3) cv2.imshow("Warped Image With corners after adaptive theshold2", imgAdaptiveThre1) if cv2.waitKey(0) & 0xFF == ord('s'): break
import numpy as np from cv2 import cv2 img = cv2.imread('detect_blob.png',1) gray = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY) thresh_img = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,115,1) #guassion threshold with output binary threshold type cv2.imshow("Adaptive Thresh Image",thresh_img) #Contours contours, hierarchy = cv2.findContours(thresh_img,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) thickness = 2 color = (255,0,255) object=np.zeros([img.shape[0],img.shape[1],3],'uint8') for c in contours: cv2.drawContours(object,[c],-1,color,thickness) #Draw contour of one object at time, -1 means all but c have only 1 object contour area = cv2.contourArea(c) #Area of one contour perimeter = cv2.arcLength(c,True) #Perimiter of one contour print("Perimeter : {}, Area: {}".format(perimeter,area)) M=cv2.moments(c) cx = int(M['m10']/M['m00']) cy = int(M['m01']/M['m00']) cv2.circle(object,(cx,cy),4,color,-1) #draw centroid as a small unit cirlce in the image at centroid pos cv2.imshow("Centroids",object) cv2.waitKey(0) cv2.destroyAllWindows()
def read_grid(image): sudoku = "" side = image.shape[0] // 9 # this is a square image - side will be 50 offset = 0.1 * side # for borders for i in range(9): for j in range(9): # coordinates if corners of a small box top = max(int(round(side * i + offset)), 0) left = max(int(round(side * j + offset)), 0) right = min(int(round(side * (j + 1) - offset)), image.shape[0]) bottom = min(int(round(side * (i + 1) - offset)), image.shape[0]) # finding the contour inside the box crop = image[top:bottom, left:right] gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (7, 7), 0) thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # finding the largest valid contour max_area = 1 largest_contour = None for cnt in contours: area = cv2.contourArea(cnt) if area > side * side * 0.04 and area > max_area: max_area = area largest_contour = cnt # if there is no contour, then the box must be empty if largest_contour is None: sudoku += "0" continue # crop out the largest contour i.e. the digit rect = cv2.minAreaRect(largest_contour) box = cv2.boxPoints(rect) box = np.int0(box) minx = max(min(box, key=lambda g: g[0])[0] - 3, 0) miny = max(min(box, key=lambda g: g[1])[1] - 3, 0) maxx = min(max(box, key=lambda g: g[0])[0] + 3, int(side)) maxy = min(max(box, key=lambda g: g[1])[1] + 3, int(side)) # gray image of only the number number_image = gray[miny:maxy, minx:maxx] # OCR custom_config = r' --psm 6 -c tessedit_char_whitelist=123456789' text = pt.image_to_string(number_image, config=custom_config) # if i == 7 and j == 3: # cv2.imshow("cropped",number_image) # print(text) try: num = int(text) sudoku += str(num) except: # if there is a big contour but its not a digit, # this frame is invalid, ignore it return None return sudoku