コード例 #1
0
def get_grid_img(frame):
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    image = cv2.GaussianBlur(image, (11, 11), 0)
    image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_MEAN_C | cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,4)
    # cv2.imshow("original", image)
    # display_image(image)

    contours = get_sorted_contours(image)
    contour, corners = get_grid_contour(contours)
    x, y, width, height = cv2.boundingRect(contour)
    average_grid_wh = int((int(width) + int(height)) / 2)
    
    # show outline of selected grid on full image
    # grid_outline = draw_grid_outline(frame.copy(), corners)
    # cv2.imshow("grid_outline", grid_outline)

    if contour is None:
        return None, None

    warped_grid, transform_matrix_inv = warp(image, corners, average_grid_wh)
    
    return warped_grid, transform_matrix_inv
コード例 #2
0
def large_image_demo(image):
    print(image.shape)
    cw = 256
    ch = 256
    h,w = image.shape[:2]
    gray = cv.cvtColor(image,cv.COLOR_BGR2GRAY)
    for row in range(0,h,ch):
        for col in range(0,w,cw):
            roi = gray[row:row+ch,col:col+cw]
            dst = cv.adaptiveThreshold(roi,255,cv.ADAPTIVE_THRESH_MEAN_C,cv.THRESH_BINARY,25,20)
            gray[row:row+ch,col:col+cw] = dst
            print(np.std(dst),np.mean(dst))
            """
            全局二值化结合方差过滤实现类似自适应二值化的效果
            dev = np.std(roi)
            if dev<15:
                gray[row:row+ch,col:col+cw] = 255
            else:
                ret,dst = cv.threshold(roi,0,255,cv.THRESH_BINARY|cv.THRESH_OTSU)
                gray[row:row+ch,col:col+cw] = dst
            """
    cv.imshow("result",gray)
コード例 #3
0
def ImageToBWEdgeDetect(img):
    # Load image:
    img_class = img.__class__.__name__
    if img_class == "str":
        img = Image.open(img)
    elif img_class == None:
        print("Invalid image format")
        return None
    # Adding white background to trasnaprent images:
    working_size = GetImageResize(img, 512, max_ratio=1.5)
    img = img.resize(working_size)
    img = img.convert("RGBA")
    white_bg = Image.new("RGB", img.size, "WHITE")
    white_bg.paste(img, (0, 0), img)
    img = white_bg.convert("RGB")
    # Usin OpenCV to find edges:
    # img = cv2.imread(img, 0)
    img = cv2.cvtColor(numpy.array(img), cv2.COLOR_RGB2GRAY)
    img = cv2.resize(img, working_size, interpolation=cv2.INTER_LINEAR)

    img = cv2.medianBlur(img, 5)
    img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                cv2.THRESH_BINARY, 11, 2)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(img)

    # img = img.convert("RGBA")
    # white_bg = Image.new("RGB", img.size, "WHITE")
    # white_bg.paste(img, (0, 0), img)
    # img = white_bg.convert("RGB")

    # img = img.quantize(colors = 2, method = Image.MAXCOVERAGE)
    # img = img.convert(mode = "1", dither = Image.NONE)
    #return img.quantize(colors = 2)
    return img


# img = ImageToBWEdgeDetect("RobloxImages/5918162200.jpg")
# img.save("grayscale_test.png")
コード例 #4
0
def cartoonizer(img, num_down=2, num_bi=5):
    #Params
    #num_down = 2 #DOWNSAMPLE STEPS
    #num_bi = 5 # BILATERAL FILTERING STEPS
    img_c = img
    for ix in range(num_down):
        img_c = cv2.pyrDown(img)  # Pyramid Down : Downsampling
    # print(img_c.shape)

    for iy in range(num_bi):
        img_c = cv2.bilateralFilter(img_c, d=9, sigmaColor=9,
                                    sigmaSpace=7)  #Filtering
    # print(img_c.shape)

    #UPSAMPLING
    for ix in range(num_down):
        img_c = cv2.pyrUp(img_c)  # Pyramid Down : Downsampling
    # print(img_c.shape)

    #BLUR and Threshold
    img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)  # GRAY SCALE
    img_blur = cv2.medianBlur(img_gray, 7)  #MEDIAN BLUR
    img_edge = cv2.adaptiveThreshold(img_blur,
                                     255,
                                     cv2.ADAPTIVE_THRESH_MEAN_C,
                                     cv2.THRESH_BINARY,
                                     blockSize=9,
                                     C=2)

    img_c = cv2.resize(img_c, (800, 800))
    #RGB CONVERSION + BITWISE &
    img_edge = cv2.cvtColor(img_edge, cv2.COLOR_GRAY2RGB)
    # print(img_c.shape)
    # print(img_edge.shape)
    img_cartoon = cv2.bitwise_and(img_c, img_edge)

    stack = np.hstack([img, img_cartoon])
    return stack
コード例 #5
0
def get_text(PDF_file):
    # Path of the pdf
    # Store all the pages of the PDF in a variable
    pages = convert_from_path(PDF_file, 500)

    # Counter to store images of each page of PDF to image
    image_counter = 1

    # Iterate through all the pages stored above
    t = ""

    for page in pages:
        # Declaring filename for each page of PDF as JPG
        # For each page, filename will be:
        # PDF page 1 -> page_1.jpg
        # PDF page 2 -> page_2.jpg
        # PDF page 3 -> page_3.jpg
        # ....
        # PDF page n -> page_n.jpg
        filename = "page_" + str(image_counter) + ".jpg"
        page.save(filename, 'JPEG')

        img = cv2.imread(filename)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Apply dilation and erosion to remove some noise
        kernel = np.ones((1, 1), np.uint8)
        img = cv2.dilate(img, kernel, iterations=1)
        img = cv2.erode(img, kernel, iterations=1)
        img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                    cv2.THRESH_BINARY, 31, 2)

        text = str(
            ((pytesseract.image_to_string(img, config=custom_oem_psm_config))))
        t += text + "\n"

        os.remove(filename)
    return t
コード例 #6
0
def try_wrap():
    img = cv2.imread("datas/images/namecard_01.jpg")
    pts = np.float32([[400, 464], [934, 544], [96, 1174], [960, 1328]])

    img_wrap = wrapping(img, pts)
    img_gray, _, _ = preprocessing(img_wrap)

    img_gray2 = cv2.resize(img_gray,
                           None,
                           fx=0.5,
                           fy=0.5,
                           interpolation=cv2.INTER_CUBIC)
    img_th = cv2.adaptiveThreshold(img_gray2, 255,
                                   cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, -10)
    # ret,img_th = cv2.threshold(img_sharp,127,255,cv2.THRESH_BINARY)
    img_ocr, words = test_pytesseract2.get_OCR2(255 - img_th)

    img_gray = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2BGR)
    # img_blur = cv2.cvtColor(img_blur, cv2.COLOR_GRAY2BGR)
    # img_sharp = cv2.cvtColor(img_sharp, cv2.COLOR_GRAY2BGR)
    img_th = cv2.cvtColor(img_th, cv2.COLOR_GRAY2BGR)
    final1 = np.hstack((img_wrap, img_gray))
    final2 = np.hstack((img_th, img_ocr))

    final1 = cv2.resize(final1,
                        None,
                        fx=0.5,
                        fy=0.5,
                        interpolation=cv2.INTER_CUBIC)
    final = np.vstack((final1, final2))

    cv2.imshow('img1', final1)
    cv2.imshow('img2', final2)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
コード例 #7
0
def try_wrap2():
    img = cv2.imread("datas/images/licenseplate_04.jpg")
    img = cv2.resize(img, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC)
    pts = np.float32([[200, 232], [467, 272], [48, 587], [480, 664]])

    img_wrap = wrapping(img, pts)
    img_gray, img_blur, img_sharp = preprocessing(img_wrap)
    img_th = cv2.adaptiveThreshold(img_sharp, 255,
                                   cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 4)
    img_ocr, words = test_pytesseract2.get_OCR2(img_th)

    img_gray = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2BGR)
    img_blur = cv2.cvtColor(img_blur, cv2.COLOR_GRAY2BGR)
    img_sharp = cv2.cvtColor(img_sharp, cv2.COLOR_GRAY2BGR)
    img_th = cv2.cvtColor(img_th, cv2.COLOR_GRAY2BGR)
    final1 = np.hstack((img_wrap, img_gray, img_blur))
    final2 = np.hstack((img_sharp, img_th, img_ocr))
    final = np.vstack((final1, final2))

    cv2.imshow('img1', img)
    # cv2.imshow('img2', final2)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
コード例 #8
0
ファイル: utils.py プロジェクト: sergiuiacob1/iClicker
def get_binary_thresholded_image(cv2_image):
    img = convert_to_gray_image(cv2_image)
    img = cv2.medianBlur(img, 5)
    img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                cv2.THRESH_BINARY, 11, 2)
    return img
コード例 #9
0
    def predict_folder(self, pather):

        dirs = os.listdir(pather)
        corrects = []
        wrongs = []
        every = []

        for folders in dirs:
            fold = os.fsdecode(folders)
            if fold.endswith((".DS_Store")):
                pass
            else:
                for filename in os.listdir(pather + "/" + fold):
                    if filename.endswith((".DS_Store")):
                        pass
                    else:
                        gray = cv2.imread(
                            pather + "/%s" % fold + "/%s" % filename, 0)
                        dilated_gray = cv2.dilate(gray,
                                                  np.ones((7, 7), np.uint8))
                        bg_gray = cv2.medianBlur(dilated_gray, 21)
                        diff_gray = 255 - cv2.absdiff(gray, bg_gray)
                        norm_gray = diff_gray.copy()
                        cv2.normalize(diff_gray,
                                      norm_gray,
                                      alpha=0,
                                      beta=255,
                                      norm_type=cv2.NORM_MINMAX,
                                      dtype=cv2.CV_8UC1)

                        _, thr_gray = cv2.threshold(norm_gray, 253, 0,
                                                    cv2.THRESH_TRUNC)
                        cv2.normalize(thr_gray,
                                      thr_gray,
                                      alpha=0,
                                      beta=255,
                                      norm_type=cv2.NORM_MINMAX,
                                      dtype=cv2.CV_8UC1)

                        gray1 = cv2.resize(255 - thr_gray, (112, 112),
                                           interpolation=cv2.INTER_AREA)

                        gray = cv2.adaptiveThreshold(
                            gray1, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                            cv2.THRESH_BINARY_INV, 5, 3)
                        (thresh, gray2) = cv2.threshold(
                            gray1, 128, 255,
                            cv2.THRESH_BINARY | cv2.THRESH_OTSU)

                        img = self.image_manip(gray)
                        img2 = self.image_manip(gray2)

                        img = image.img_to_array(img)
                        img2 = image.img_to_array(img2)

                        test_img = img.reshape((1, 112, 112, 1))
                        test_img2 = img2.reshape((1, 112, 112, 1))

                        test_img = img.astype('float32')
                        test_img2 = img2.astype('float32')

                        test_img /= 255
                        test_img2 /= 255

                        test_img = np.expand_dims(test_img, axis=0)
                        test_img2 = np.expand_dims(test_img2, axis=0)

                        prediction = self.model.predict(test_img)
                        prediction2 = self.model.predict(test_img2)

                        x1 = np.argmax(prediction)
                        x2 = np.argmax(prediction2)

                        c = prediction[0][x1]
                        v = prediction2[0][x2]

                        b = prediction[0][x2]
                        n = prediction2[0][x1]

                        k = (c + b) / 2
                        j = (v + n) / 2
                        if k > j:
                            classname = x1

                        if j > k:
                            classname = x2

                        print(filename + "prediction: ", classname)

                        # This list is to keep count of how many images are being classified
                        every.append(filename)

                        # This is for benchmark tests during development and demonstrating larger sets of images,
                        # This requires the images to be labeled to see how many predictions were right or wrong,
                        # THIS IS OPTIONAL, if you dont label your images you wont get a list of prediction accuracy,
                        # But the predictions will be printed as outputs next to each image filename.
                        if filename.startswith(str(classname)):
                            corrects.append(filename)
                        else:
                            wrongs.append(filename)

        co = 0
        w = 0
        e = 0
        for i in every:
            e += 1

        for i in corrects:
            co += 1

        for i in wrongs:
            w += 1
        print("all numbers")
        print(e)
        print("right guess")
        print(co)
        print("wrong guess")
        print(w)
        print(wrongs)
        print("Running: " + "ALnet-2.0")
def DetectColorGrids(Color_checker_image):
    img_gray = cv.cvtColor(Color_checker_image,cv.COLOR_BGR2GRAY)

    img_denoise = cv.fastNlMeansDenoising(img_gray,10,7,21)

    img_threshold = cv.adaptiveThreshold(img_denoise,255,cv.ADAPTIVE_THRESH_MEAN_C,cv.THRESH_BINARY,21,3)

    kernel = np.ones((3,3), np.uint8)
    img_eroded = cv.erode(img_threshold,kernel)

    small_grid_size_range = GetSmallGridSizeRange(Color_checker_image)

    contours, _hierarchy = cv.findContours(img_eroded, cv.RETR_TREE, cv.CHAIN_APPROX_NONE)

    color_grid_contours = FilterColorGrid(contours, small_grid_size_range)

    centroid_positions = []
    centroid_positions = FindContourCenter(color_grid_contours)

    centroids_no_overlap = []
    centroids_no_overlap = FilterOutOverlapPoint(color_grid_contours,centroid_positions)

    #small to big, base on y axis
    centroids_no_overlap = sorted(centroids_no_overlap,key=lambda centroid: centroid[1])

    rad_top = math.atan((centroids_no_overlap[1][1] - centroids_no_overlap[0][1]) / 
                        (centroids_no_overlap[1][0] - centroids_no_overlap[0][0]))
    angle_top = rad_top * 180 / PI

    centroids_no_overlap_size = len(centroids_no_overlap)
    rad_bottom = math.atan((centroids_no_overlap[centroids_no_overlap_size - 1][1] - centroids_no_overlap[centroids_no_overlap_size - 2][1]) /
                            (centroids_no_overlap[centroids_no_overlap_size - 1][0] - centroids_no_overlap[centroids_no_overlap_size - 2][0]))
    angle_bottom = rad_bottom * 180 / PI

    angle_subtract = abs(angle_top - angle_bottom)

    if angle_subtract >= ANGLE_SUBTRACT_MAX_ENDURANCE:
        orientation_left_top = centroids_no_overlap[0]
        orientation_right_bottom=[0.0,0.0]

        for centroid in centroids_no_overlap:
            if centroid[0] < orientation_left_top[0]:
                orientation_left_top[0] = centroid[0]
            if centroid[1] < orientation_left_top[1]:
                orientation_left_top[1] = centroid[1]
            if centroid[0] > orientation_right_bottom[0]:
                orientation_right_bottom[0] = centroid[0]
            if centroid[1] > orientation_right_bottom[1]:
                orientation_right_bottom[1] = centroid[1]

        translation=[0.0,0.0]
        translation[0] = (orientation_right_bottom[0] - orientation_left_top[0]) / (TOTAL_COLUMNS - 1)
        translation[1] = (orientation_right_bottom[1] - orientation_left_top[1]) / (TOTAL_ROWS - 1)
        
        grids_position = np.zeros((TOTAL_ROWS, TOTAL_COLUMNS, 2), dtype=np.int)

        row_count = 0
        col_count = 0
        while(row_count < TOTAL_ROWS):
            col_count = 0
            while(col_count < TOTAL_COLUMNS):
                grids_position[row_count][col_count][0] = orientation_left_top[0] + translation[0]*col_count
                grids_position[row_count][col_count][1] = orientation_left_top[1] + translation[1]*row_count
                col_count += 1
            row_count += 1
        
        return grids_position

    else:
        angle_avg = (angle_top + angle_bottom) / 2
        img_rotation = np.zeros(img_gray.shape, dtype=np.uint8)
        rows, cols = img_gray.shape
        center = [cols / 2, rows / 2]
        rotation_mat = cv.getRotationMatrix2D((center[0],center[1]),angle_avg,1.0)
        img_rotation = cv.warpAffine(img_eroded,rotation_mat,(cols,rows))

        contours, _hierarchy = cv.findContours(img_rotation, cv.RETR_TREE, cv.CHAIN_APPROX_NONE)

        color_grid_contours = FilterColorGrid(contours, small_grid_size_range)

        centroid_positions = []
        centroid_positions = FindContourCenter(color_grid_contours)

        orientation_left_top = centroid_positions[0]
        orientation_right_bottom=[0.0,0.0]

        # contour_arr_all = np.zeros(img_gray.shape, dtype=np.uint8)

        i=0
        for centroid in centroid_positions:
            if centroid[0] < orientation_left_top[0]:
                orientation_left_top[0] = centroid[0]
            if centroid[1] < orientation_left_top[1]:
                orientation_left_top[1] = centroid[1]
            if centroid[0] > orientation_right_bottom[0]:
                orientation_right_bottom[0] = centroid[0]
            if centroid[1] > orientation_right_bottom[1]:
                orientation_right_bottom[1] = centroid[1]
            i+=1

        translation=[0.0,0.0]
        translation[0] = (orientation_right_bottom[0] - orientation_left_top[0]) / (TOTAL_COLUMNS - 1)
        translation[1] = (orientation_right_bottom[1] - orientation_left_top[1]) / (TOTAL_ROWS - 1)

        grid_coordinate = np.zeros((TOTAL_ROWS, TOTAL_COLUMNS, 2), dtype=np.int)

        row_count = 0
        col_count = 0
        while(row_count < TOTAL_ROWS):
            col_count = 0
            while(col_count < TOTAL_COLUMNS):
                grid_coordinate[row_count][col_count][0] = orientation_left_top[0] + translation[0]*col_count
                grid_coordinate[row_count][col_count][1] = orientation_left_top[1] + translation[1]*row_count
                col_count += 1
            row_count +=1

        grids_position = np.zeros((TOTAL_ROWS, TOTAL_COLUMNS, 2), dtype=np.int)
        temp_coordinate = [0,0]
        row_count = 0
        col_count = 0
        while(row_count < TOTAL_ROWS):
            col_count = 0
            while(col_count < TOTAL_COLUMNS):
                temp_coordinate = GetRotationPoint((grid_coordinate[row_count][col_count][0],grid_coordinate[row_count][col_count][1]), cols, rows, -1 * angle_avg)
                grids_position[row_count][col_count][0] = temp_coordinate[0]
                grids_position[row_count][col_count][1] = temp_coordinate[1]
                col_count += 1
            row_count += 1

        return grids_position
コード例 #11
0
from cv2 import cv2
''' 
    Применить на практике бинаризацию всех типов для изображений.
    Подобрано по 2 примера изображений на каждый тип, иллюстрирующие разницу результатов использованных методов.
'''
"""Для дорог лучше подходит метод TRESH_TOZERO, т.к. картинка монотонна, а полученное изображение четчё при ручном подборе трешхолда"""
img = cv2.imread('Doroga1.png', cv2.IMREAD_GRAYSCALE)

treshold, img1 = cv2.threshold(img,
                               thresh=200,
                               maxval=255,
                               type=cv2.THRESH_TOZERO)
treshold, img2 = cv2.threshold(img, 200, 255, cv2.THRESH_OTSU)
img3 = cv2.adaptiveThreshold(img,
                             maxValue=255,
                             adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C,
                             thresholdType=cv2.THRESH_BINARY,
                             blockSize=11,
                             C=-4)

img_2 = cv2.imread('Doroga2.png', cv2.IMREAD_GRAYSCALE)

treshold, img4 = cv2.threshold(img_2, 200, 255, cv2.THRESH_TOZERO)
treshold, img5 = cv2.threshold(img_2, 200, 255, cv2.THRESH_OTSU)
img6 = cv2.adaptiveThreshold(img_2, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                             cv2.THRESH_BINARY, 3, -4)
"""Для сканов лучше подходит как адаптивный метод, так и TRESH_OTSU, т.к. скан документа не монотонный"""
# img = cv2.imread('Scan1.png', cv2.IMREAD_GRAYSCALE)

# treshold, img1 = cv2.threshold(img, 128, 255, cv2.THRESH_TOZERO)
# treshold, img2 = cv2.threshold(img, 128, 255, cv2.THRESH_OTSU)
# img3 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 31, 30)
コード例 #12
0
# from googletrans import Translator

# translator = Translator()

# hindi_sent = "नमस्कार, धृति!"
# translation = translator.translate(hindi_sent,dest="en")
# translated_sentence = (f"{translation.text}")
# print(translated_sentence)

from cv2 import cv2
import numpy as np
import pytesseract
# from google.colab import cv2_imshow

img = cv2.imread('test2.png')
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
reduced_noise = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,12)
# cv2.imshow("",reduced_noise)
# cv2.waitKey(0)
print("\nImage is loaded succesfully")

text=pytesseract.image_to_string(reduced_noise,lang='eng')
print("The text is :\n",text)
コード例 #13
0
ファイル: preprocess.py プロジェクト: AlbertFlorinus/mnistcnn
def decide(filename):
    # reading image to grayscale
    gray = cv2.imread(f"/content/mnistcnn/digits/{filename}", 0)

    # dilate img
    dilated_gray = cv2.dilate(gray, np.ones((7, 7), np.uint8))

    # median blur the dilated img to further suppress detail
    bg_gray = cv2.medianBlur(dilated_gray, 21)

    # calculate difference between gray(original img) and bg_gray.
    # identical pixels will be black(close to 0 difference), the digits details will be white(large difference).
    diff_gray = 255 - cv2.absdiff(gray, bg_gray)

    # normalize the image, so that we use the full dynamic range.
    norm_gray = diff_gray.copy()
    cv2.normalize(diff_gray,
                  norm_gray,
                  alpha=0,
                  beta=255,
                  norm_type=cv2.NORM_MINMAX,
                  dtype=cv2.CV_8UC1)

    # now the image is still gray in some areas, so we truncate that away and re-normalize the image.
    _, thr_gray = cv2.threshold(norm_gray, 253, 0, cv2.THRESH_TRUNC)
    cv2.normalize(thr_gray,
                  thr_gray,
                  alpha=0,
                  beta=255,
                  norm_type=cv2.NORM_MINMAX,
                  dtype=cv2.CV_8UC1)

    # inverting color and rezising image to 112x112
    gray1 = cv2.resize(255 - thr_gray, (112, 112),
                       interpolation=cv2.INTER_AREA)

    # we have variables gray and gray2 to differentiate between two ways of thresholding.
    # Alnet-2.0 will make predictions on both of these, and combine the two to get our resulting classification.
    gray = cv2.adaptiveThreshold(gray1, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                 cv2.THRESH_BINARY_INV, 7, 3)
    (thresh, gray2) = cv2.threshold(gray1, 128, 255,
                                    cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    # converting PIL image to numpy array

    img = image.img_to_array(gray)
    img2 = image.img_to_array(gray2)

    # reshaping to 1 channel (its only 1 image), 112x112 shape, and 1 color channel (grayscale)
    test_img = img.reshape((1, 112, 112, 1))
    test_img2 = img2.reshape((1, 112, 112, 1))

    # change datatype from uint8 to float32
    test_img = img.astype('float32')
    test_img2 = img2.astype('float32')

    # normalizing pixels in the decimal range 0 to 1
    test_img /= 255
    test_img2 /= 255

    # extend axis at index[0] to ready for prediction
    test_img = np.expand_dims(test_img, axis=0)
    test_img2 = np.expand_dims(test_img2, axis=0)

    # get network output (10 nodes)
    # prediction_list is prediction of gray
    # prediction2_list is prediction of gray2
    prediction_list = model.predict(test_img)
    prediction2_list = model.predict(test_img2)

    return prediction_list, prediction2_list, test_img, test_img2
コード例 #14
0
from cv2 import cv2 as cv
import numpy as np

img = cv.imread('cafe.jpg', 0)
img2 = cv.imread('olho.PNG', 0)

metodo = cv.THRESH_BINARY_INV # objeto de interesse na cor branca
#metodo = cv.THRESH_BINARY # objeto de interesse na cor preta

ret, imgBin = cv.threshold(img, 200, 255, metodo) # pixels com valores entre o limiar sao considerado como objeto de interesse

img_gaussian = cv.medianBlur(img2, 7)

#diferentes valores de limiar para cada regiao da imagem
#obtem melhor resultado considerando constraste
mean = cv.adaptiveThreshold(img_gaussian, 255, cv.ADAPTIVE_THRESH_MEAN_C, metodo, 11, 5) 
gaussian = cv.adaptiveThreshold(img_gaussian, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, metodo, 11, 2)

cv.imshow('original', img)
cv.imshow('imgBin', imgBin)
cv.imshow('mean', mean)
cv.imshow('gaussian', gaussian)

cv.waitKey(0)
cv.destroyAllWindows()
コード例 #15
0
from cv2 import cv2 as cv

def rescaleFrame(frame, scale=0.75):
    width = int(frame.shape[1] * scale)
    height = int(frame.shape[0] * scale)
    dimensions = (width,height)
    return cv.resize(frame, dimensions, interpolation=cv.INTER_AREA)
#getting image
init = cv.imread('imagen/wppstalkerlarge.jpg')
img = rescaleFrame(init, 0.4)
cv.imshow('stalker', img)

gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
cv.imshow('GRay', gray)

#Simple Thresholding
threshold, thresh = cv.threshold(gray, 100, 255, cv.THRESH_BINARY)
cv.imshow('threshImaged', thresh)

threshold, threshInv = cv.threshold(gray, 100, 255, cv.THRESH_BINARY_INV)
cv.imshow('threshImagedInverse', threshInv)

#Adaptive Thresholding
adaptiveThresh = cv.adaptiveThreshold(gray, 255, cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY, 11, 5)
cv.imshow('adaptive thresholding', adaptiveThresh)


cv.waitKey(0)
コード例 #16
0
def videoCap():
    # pytesseract.pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract"
    cap = cv2.VideoCapture(
        'C:/Users/jeawa/Desktop/project/LPR_Project/project/video/test_car.mp4'
    )

    count = 0
    before_chars = ""

    while True:
        el = cv2.getTickCount()
        fps = cap.get(5)

        ret, img_ori = cap.read()

        height, width, channel = img_ori.shape  # 높이, 너비, 채널 확보

        gray = cv2.cvtColor(img_ori, cv2.COLOR_BGR2GRAY)

        # img_ori = cv2.imread('LPR_Project/project/image/3.jpg')  # 이미지 불러오기
        # gray = cv2.cvtColor(img_ori, cv2.COLOR_BGR2GRAY)

        img_blurred = cv2.GaussianBlur(gray, ksize=(3, 3), sigmaX=0)  # 노이즈 제거

        img_thresh = cv2.adaptiveThreshold(
            img_blurred,
            maxValue=255.0,
            adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            thresholdType=cv2.THRESH_BINARY_INV,
            blockSize=19,  # 12 통과 15,20
            C=9)

        contours, _ = cv2.findContours(  # 윤곽선을 찾기
            img_thresh,
            mode=cv2.RETR_LIST,  #
            method=cv2.CHAIN_APPROX_TC89_KCOS  #
        )

        temp_result = np.zeros((height, width, channel), dtype=np.uint8)

        cv2.drawContours(
            temp_result,  # 원본이미지
            contours=contours,  # contours 정보
            contourIdx=-1,  # -1 : 전체
            color=(255, 255, 255),
            thickness=1)

        contours_dict = []
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)  # 컴투어를 감싸는 사각형
            # cv2.rectangle(
            #     temp_result,
            #     pt1=(x, y),
            #     pt2=(x+w, y+h),
            #     color=(255, 255, 255),
            #     thickness=1
            # )
            contours_dict.append({
                'contour': contour,
                'x': x,
                'y': y,
                'w': w,
                'h': h,
                'cx': x + (w / 2),  # 중심 좌표
                'cy': y + (h / 2)
            })
        MIN_AREA, MAX_AREA = 100, 1000  # boundingRect(사각형)의 최소넓이
        MIN_WIDTH, MIN_HEIGHT = 2, 8  # boundingRect의 최소 넓이, 높이 2, 8
        MIN_RATIO, MAX_RATIO = 0.3, 0.9  # boundingRect의 가로 세로 비율

        possible_contours = []  # 위의 조건의 만족하는 사각형
        cnt = 0
        for d in contours_dict:
            area = d['w'] * d['h']  # 가로 * 세로 = 면적
            ratio = d['w'] / d['h']  # 가로 / 세로 = 비율

            if MIN_AREA < area < MAX_AREA and d['w'] > MIN_WIDTH and d[
                    'h'] > MIN_HEIGHT and MIN_RATIO < ratio < MAX_RATIO:
                d['idx'] = cnt  # 조건의 맞는 값을 idx에 저장한다.
                cnt += 1
                possible_contours.append(d)  # possible_contour를 업데이트한다.

        # temp_result = np.zeros((height, width, channel), dtype=np.uint8)

        for d in possible_contours:
            # cv2.drawContours(temp_result, d['contour'], -1, (255, 255, 255))
            cv2.rectangle(temp_result,
                          pt1=(d['x'], d['y']),
                          pt2=(d['x'] + d['w'], d['y'] + d['h']),
                          color=(0, 0, 255),
                          thickness=1)

        MAX_DIAG_MULTIPLYER = 4  # 대각선의 5배 안에 있어야함
        MAX_ANGLE_DIFF = 12.0  # 12.0            #세타의 최대값         12, 0.7, 0.5, 0.6, 3
        MAX_AREA_DIFF = 0.3  # 0.5               #면적의 차이
        MAX_WIDTH_DIFF = 0.5  # 너비차이
        MAX_HEIGHT_DIFF = 0.6  # 높이차이
        MIN_N_MATCHED = 4  # 3                   #위의 조건이 3개 미만이면 뺀다

        def find_chars(contour_list):  #
            matched_result_idx = []  # idx 값 저장
            for d1 in contour_list:
                matched_contours_idx = []
                for d2 in contour_list:
                    if d1['idx'] == d2['idx']:  # d1 과 d2가 같으면 컨티뉴
                        continue

                    dx = abs(d1['cx'] - d2['cx'])
                    dy = abs(d1['cy'] - d2['cy'])

                    diagonal_length1 = np.sqrt(d1['w']**2 + d1['h']**2)

                    distance = np.linalg.norm(
                        np.array([d1['cx'], d1['cy']]) -
                        np.array([d2['cx'], d2['cy']]))  # 대각선 길이
                    if dx == 0:
                        angle_diff = 90  # 0일 때 각도 90도 (예외처리)
                    else:
                        angle_diff = np.degrees(np.arctan(dy / dx))  # 세타 구하기
                    area_diff = abs(d1['w'] * d1['h'] - d2['w'] * d2['h']) / (
                        d1['w'] * d1['h'])  # 면적 비율
                    width_diff = abs(d1['w'] - d2['w']) / d1['w']  # 너비비율
                    height_diff = abs(d1['h'] - d2['h']) / d1['h']  # 높이비율
                    # 조건들
                    if distance < diagonal_length1 * MAX_DIAG_MULTIPLYER \
                            and angle_diff < MAX_ANGLE_DIFF and area_diff < MAX_AREA_DIFF \
                            and width_diff < MAX_WIDTH_DIFF and height_diff < MAX_HEIGHT_DIFF:
                        # d2만 넣었기 때문에 마지막으로 d1을 넣은다
                        matched_contours_idx.append(d2['idx'])

                # append this contour
                matched_contours_idx.append(d1['idx'])

                if len(matched_contours_idx) < MIN_N_MATCHED:  # 3개 이하이면 번호판 x
                    continue

                matched_result_idx.append(matched_contours_idx)  # 최종 후보군

                unmatched_contour_idx = []  # 최종후보군이 아닌 애들
                for d4 in contour_list:
                    if d4['idx'] not in matched_contours_idx:
                        unmatched_contour_idx.append(d4['idx'])

                unmatched_contour = np.take(possible_contours,
                                            unmatched_contour_idx)

                # recursive
                recursive_contour_list = find_chars(unmatched_contour)

                for idx in recursive_contour_list:
                    matched_result_idx.append(idx)  # 번호판 이외의 값을 재정의

                break
            return matched_result_idx

        result_idx = find_chars(possible_contours)
        matched_result = []
        for idx_list in result_idx:
            matched_result.append(np.take(possible_contours, idx_list))
        # visualize possible contours
        temp_result = np.zeros((height, width, channel), dtype=np.uint8)
        for r in matched_result:
            for d in r:
                cv2.rectangle(temp_result,
                              pt1=(d['x'], d['y']),
                              pt2=(d['x'] + d['w'], d['y'] + d['h']),
                              color=(0, 0, 255),
                              thickness=1)

        PLATE_WIDTH_PADDING = 1.2  # 1.3
        PLATE_HEIGHT_PADDING = 1.5  # 1.5
        MIN_PLATE_RATIO = 3
        MAX_PLATE_RATIO = 7  #10

        plate_imgs = []
        plate_infos = []

        for i, matched_chars in enumerate(matched_result):
            sorted_chars = sorted(matched_chars,
                                  key=lambda x: x['cx'])  # x방향으로 순차적으로 정렬

            plate_cx = (sorted_chars[0]['cx'] +
                        sorted_chars[-1]['cx']) / 2  # 센터 좌표 구하기
            plate_cy = (sorted_chars[0]['cy'] + sorted_chars[-1]['cy']) / 2

            plate_width = (sorted_chars[-1]['x'] + sorted_chars[-1]['w'] -
                           sorted_chars[0]['x']) * PLATE_WIDTH_PADDING  # 너비

            # sum_height = 0
            # for d in sorted_chars:
            #     sum_height += d['h']

            # plate_height = int(sum_height / len(sorted_chars)
            #                    * PLATE_HEIGHT_PADDING)  # 높이
            plate_height = (sorted_chars[-1]['y'] - sorted_chars[0]['y'] +
                            sorted_chars[-1]['h']) * PLATE_HEIGHT_PADDING
            #
            triangle_height = sorted_chars[-1]['cy'] - sorted_chars[0]['cy']
            triangle_hypotenus = np.linalg.norm(
                np.array([sorted_chars[0]['cx'], sorted_chars[0]['cy']]) -
                np.array([sorted_chars[-1]['cx'], sorted_chars[-1]['cy']]))

            angle = np.degrees(np.arcsin(triangle_height / triangle_hypotenus))

            rotation_matrix = cv2.getRotationMatrix2D(center=(plate_cx,
                                                              plate_cy),
                                                      angle=angle,
                                                      scale=1.0)

            # 회전
            img_rotated = cv2.warpAffine(img_thresh,
                                         M=rotation_matrix,
                                         dsize=(width, height))

            img_cropped = cv2.getRectSubPix(img_rotated,
                                            patchSize=(int(plate_width),
                                                       int(plate_height)),
                                            center=(int(plate_cx),
                                                    int(plate_cy)))

            if img_cropped.shape[1] / img_cropped.shape[
                    0] < MIN_PLATE_RATIO or img_cropped.shape[
                        1] / img_cropped.shape[
                            0] < MIN_PLATE_RATIO > MAX_PLATE_RATIO:
                continue

            plate_imgs.append(img_cropped)
            plate_infos.append({
                'x': int(plate_cx - plate_width / 2),
                'y': int(plate_cy - plate_height / 2),
                'w': int(plate_width),
                'h': int(plate_height)
            })
            for i, plate_img in enumerate(plate_imgs):

                plate_img = cv2.resize(plate_img, dsize=(0, 0), fx=1.6, fy=1.6)
                _, plate_img = cv2.threshold(plate_img,
                                             thresh=0.0,
                                             maxval=255.0,
                                             type=cv2.THRESH_BINARY
                                             | cv2.THRESH_OTSU)
                # plt.imshow(plate_img, cmap='gray')
                # plt.show()
                # find contours again (same as above)
                contours, _ = cv2.findContours(plate_img,
                                               mode=cv2.RETR_LIST,
                                               method=cv2.CHAIN_APPROX_SIMPLE)

                plate_min_x, plate_min_y = plate_img.shape[1], plate_img.shape[
                    0]
                plate_max_x, plate_max_y = 0, 0

                for contour in contours:
                    x, y, w, h = cv2.boundingRect(contour)

                    area = w * h
                    ratio = w / h

                    if area > MIN_AREA \
                            and w > MIN_WIDTH and h > MIN_HEIGHT \
                            and MIN_RATIO < ratio < MAX_RATIO:
                        if x < plate_min_x:
                            plate_min_x = x
                        if y < plate_min_y:
                            plate_min_y = y
                        if x + w > plate_max_x:
                            plate_max_x = x + w
                        if y + h > plate_max_y:
                            plate_max_y = y + h

            img_result = plate_img[plate_min_y:plate_max_y,
                                   plate_min_x:plate_max_x]
            img_result = cv2.GaussianBlur(img_result, ksize=(3, 3), sigmaX=0)
            _, img_result = cv2.threshold(img_result,
                                          thresh=0.0,
                                          maxval=255.0,
                                          type=cv2.THRESH_BINARY
                                          | cv2.THRESH_OTSU)
            img_result = cv2.copyMakeBorder(img_result,
                                            top=10,
                                            bottom=10,
                                            left=20,
                                            right=10,
                                            borderType=cv2.BORDER_CONSTANT,
                                            value=(0, 0, 0))
            chars = pytesseract.image_to_string(img_result,
                                                lang='kor',
                                                config='--psm 7 --oem 0')

            result_chars = ''

            has_digit = False
            # <= ord('힣')
            for c in chars:
                if \
                    + ord('가') == ord(c) or \
                    + ord('나') == ord(c) or \
                    + ord('다') == ord(c) or \
                    + ord('라') == ord(c) or \
                    + ord('마') == ord(c) or \
                    + ord('거') == ord(c) or \
                    + ord('너') == ord(c) or \
                    + ord('더') == ord(c) or \
                    + ord('러') == ord(c) or \
                    + ord('머') == ord(c) or \
                    + ord('버') == ord(c) or \
                    + ord('서') == ord(c) or \
                    + ord('어') == ord(c) or \
                    + ord('저') == ord(c) or \
                    + ord('고') == ord(c) or \
                    + ord('노') == ord(c) or \
                    + ord('도') == ord(c) or \
                    + ord('로') == ord(c) or \
                    + ord('모') == ord(c) or \
                    + ord('보') == ord(c) or \
                    + ord('소') == ord(c) or \
                    + ord('오') == ord(c) or \
                    + ord('조') == ord(c) or \
                    + ord('구') == ord(c) or \
                    + ord('누') == ord(c) or \
                    + ord('두') == ord(c) or \
                    + ord('루') == ord(c) or \
                    + ord('무') == ord(c) or \
                    + ord('부') == ord(c) or \
                    + ord('수') == ord(c) or \
                    + ord('우') == ord(c) or \
                    + ord('주') == ord(c) or \
                    + ord('아') == ord(c) or \
                    + ord('바') == ord(c) or \
                    + ord('사') == ord(c) or \
                    + ord('자') == ord(c) or \
                    + ord('배') == ord(c) or \
                    + ord('하') == ord(c) or \
                    + ord('허') == ord(c) or \
                        + ord('호') == ord(c) or c.isdigit():
                    if c.isdigit():
                        has_digit = True
                    result_chars += c
            # print(result_chars)
            # print(len(result_chars))

            if result_chars == "":
                pass
            else:
                if before_chars == result_chars and 6 < len(result_chars) < 9:
                    count += 1
                else:
                    before_chars = result_chars
                    count = 0

            if count == 1:
                print(result_chars)
                count = 0
                before_chars = ""

        # print(fps + " : 프레임 영상입니다.")
        print(fps)
        a = np.hstack((img_ori, temp_result))
        cv2.imshow("Go", a)
        if cv2.waitKey(42) == ord('q'):
            break
        e2 = cv2.getTickCount()
        time = (e2 - el) / cv2.getTickFrequency()
        print(time)
        # print("1프레임 : 1초당 처리속도는 " + time + "입니다.")
    cap.release()
    cv2.destroyAllWindows()
コード例 #17
0
from cv2 import cv2
import numpy as np
from matplotlib import pyplot as plt

img = cv2.imread('./images/sudoku.png', 0)
img = cv2.medianBlur(img, 5)
ret, th1 = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
th2 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                            cv2.THRESH_BINARY, 11, 2)
th3 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                            cv2.THRESH_BINARY, 11, 2)
titles = [
    'Original Image', 'Global Thresholding (v = 127)',
    'Adaptive Mean Thresholding', 'Adaptive Gaussian Thresholding'
]
images = [img, th1, th2, th3]
for i in range(4):
    plt.subplot(2, 2, i + 1), plt.imshow(images[i], 'gray')
    plt.title(titles[i])
    plt.xticks([]), plt.yticks([])
plt.show()
コード例 #18
0
ファイル: ocr.py プロジェクト: Fethbita/eMRTD_face_access
def capture_mrz(window: sg.Window,
                camera_id: int) -> Tuple[List[str], Image.Image]:
    """
    Capture the MRZ by using OCR and the camera footage.

    :returns: MRZ lines in a list
    """

    cap = cv2.VideoCapture(camera_id)

    tess_api = PyTessBaseAPI(init=False, psm=PSM.SINGLE_BLOCK_VERT_TEXT)
    tess_api.InitFull(
        # https://github.com/DoubangoTelecom/ultimateMRZ-SDK/tree/master/assets/models
        path="text_detection",
        lang="mrz",
        variables={
            "load_system_dawg": "false",
            "load_freq_dawg": "false",
            "tessedit_char_whitelist": "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ<",
        },
    )
    # mrz_list: List[List[str]] = []

    pool = ThreadPool(processes=1)
    ocr_running = False
    while True:
        _, frame = cap.read()

        mrz = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        mrz = cv2.adaptiveThreshold(mrz, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                    cv2.THRESH_BINARY, 21, 10)
        # mrz = cv2.adaptiveThreshold(mrz, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,3,2)
        # mrz = cv2.GaussianBlur(mrz, (5,5), 0)
        # _, mrz = cv2.threshold(mrz, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        # mrz = cv2.GaussianBlur(mrz, (5,5), 0)
        # mrz = cv2.medianBlur(mrz, 3)
        frame_shown = copy.deepcopy(mrz)
        width = 320
        height = int(frame_shown.shape[0] * (320 / frame_shown.shape[1]))
        frame_shown = cv2.resize(frame_shown, (width, height))

        alpha = 0.8
        frame_overlay = add_mrz_overlay(copy.deepcopy(frame_shown),
                                        "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<", 3,
                                        0.9, False)
        frame_overlay = add_mrz_overlay(
            frame_overlay, "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<", 2,
            0.9, True)
        cv2.addWeighted(frame_shown, alpha, frame_overlay, 1 - alpha, 0,
                        frame_shown)

        imgbytes = cv2.imencode(".png", frame_shown)[1].tobytes()
        window.write_event_value("-SHOW MRZ-", [imgbytes])

        mrz = Image.fromarray(mrz)
        if not ocr_running:
            checked_frame = Image.fromarray(frame[:, :, ::-1])
            tess_api.SetImage(mrz)
            async_result = pool.apply_async(tess_api.GetUTF8Text)
            ocr_running = True

        if async_result.ready():
            ocr_running = False
            mrz_text = async_result.get()
            result = parse_mrz_ocr(mrz_text)

            if result is not None:
                break

            # if result and len(mrz_list) < 3:
            #     mrz_list.append(result)
            # elif not result:
            #     mrz_list = []
            # else:
            #     if all(x == mrz_list[0] for x in mrz_list):
            #         break

    # When everything done, release the capture
    cap.release()
    # cv2.destroyAllWindows()
    tess_api.End()

    # return mrz_list[0]
    window.write_event_value("-HIDE MRZ-", "")

    return (result, checked_frame)
コード例 #19
0
from cv2 import cv2
import numpy as np

img = cv2.imread('sudoku.jpg')

# NOTE: If not converted to grayscale the following error is coming:
# error: (-215:Assertion failed) src.type() == CV_8UC1 in function 'cv::adaptiveThreshold'

# The problem is that you are trying to use adaptive thresholding to an image that is not in greyscale.
# And the function only works with a greyscale images.
# So you have to convert your image to a greyscale format as it is described in documentation.
# They read the image in a greyscale format with: img = cv2.imread('dave.jpg',0). You can also convert it
# to greyscale

img_grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# SYNTAX: adaptiveThreshold(src, maxValue, adaptiveMethod, thresholdType, blockSize, C)
# NOTE: More the value of 'C' the more white the image becomes
adap_img = cv2.adaptiveThreshold(img_grey, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                 cv2.THRESH_BINARY, 11, 1)  # MEAN

adap_img1 = cv2.adaptiveThreshold(img_grey, 255,
                                  cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                  cv2.THRESH_BINARY, 11, 2)  # GAUSSIAN

cv2.imshow('Original Image', img)
cv2.imshow('Adaptive_Mean Image', adap_img)
cv2.imshow('Adaptive_Gaussian Image', adap_img1)

if cv2.waitKey(0) == 27:
    cv2.destroyAllWindows()
コード例 #20
0
    def predict_chosen(self):

        print("")
        # selecting image to predict
        filename = askopenfilename()

        # reading image to grayscale
        gray = cv2.imread(filename, 0)

        # dilate img
        dilated_gray = cv2.dilate(gray, np.ones((7, 7), np.uint8))

        # median blur the dilated img to further suppress detail
        bg_gray = cv2.medianBlur(dilated_gray, 21)

        # calculate difference between gray(original img) and bg_gray.
        # identical pixels will be black(close to 0 difference), the digits details will be white(large difference).
        diff_gray = 255 - cv2.absdiff(gray, bg_gray)

        # normalize the image, so that we use the full dynamic range.
        norm_gray = diff_gray.copy()
        cv2.normalize(diff_gray,
                      norm_gray,
                      alpha=0,
                      beta=255,
                      norm_type=cv2.NORM_MINMAX,
                      dtype=cv2.CV_8UC1)

        # now the image is still gray in some areas, so we truncate that away and re-normalize the image.
        _, thr_gray = cv2.threshold(norm_gray, 253, 0, cv2.THRESH_TRUNC)
        cv2.normalize(thr_gray,
                      thr_gray,
                      alpha=0,
                      beta=255,
                      norm_type=cv2.NORM_MINMAX,
                      dtype=cv2.CV_8UC1)

        # inverting color and rezising image to 112x112
        gray1 = cv2.resize(255 - thr_gray, (112, 112),
                           interpolation=cv2.INTER_AREA)

        # we have variables gray and gray2 to differentiate between two ways of thresholding.
        # Alnet-2.0 will make predictions on both of these, and combine the two to get our resulting classification.
        gray = cv2.adaptiveThreshold(gray1, 255,
                                     cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                     cv2.THRESH_BINARY_INV, 5, 3)
        (thresh, gray2) = cv2.threshold(gray1, 128, 255,
                                        cv2.THRESH_BINARY | cv2.THRESH_OTSU)

        img = self.image_manip(gray)
        img2 = self.image_manip(gray2)

        # converting PIL image to numpy array
        img = image.img_to_array(img)
        img2 = image.img_to_array(img2)

        # reshaping to 1 channel (its only 1 image), 112x112 shape, and 1 color channel (grayscale)
        test_img = img.reshape((1, 112, 112, 1))
        test_img2 = img2.reshape((1, 112, 112, 1))

        # change datatype from uint8 to float32
        test_img = img.astype('float32')
        test_img2 = img2.astype('float32')

        # normalizing pixels in the decimal range 0 to 1
        test_img /= 255
        test_img2 /= 255

        # extend axis at index[0] to ready for prediction
        test_img = np.expand_dims(test_img, axis=0)
        test_img2 = np.expand_dims(test_img2, axis=0)

        # get network output (10 nodes)
        # prediction_list is prediction of gray
        # prediction2_list is prediction of gray2
        prediction_list = self.model.predict(test_img)
        prediction2_list = self.model.predict(test_img2)

        # x1 and x2 are indexes for elements (what digit is guessed) of highest value for images gray and gray2
        x1 = np.argmax(prediction_list)
        x2 = np.argmax(prediction2_list)

        # c is the highest value in prediction_list
        # v is the highest value in prediction2_list
        c = prediction_list[0][x1]
        v = prediction2_list[0][x2]

        # b is the value of prediction_list at index of prediction2_list highest value
        # n is the value of prediction2_list at index of predition_list highest value
        b = prediction_list[0][x2]
        n = prediction2_list[0][x1]

        # this helps us decide what prediction is most likely to be correct, by comparing the values below.
        k = (c + b) / 2
        j = (v + n) / 2
        if k > j:
            classname = x1

        if j > k:
            classname = x2

        # reshaping again for showing image to user
        test_img = test_img.reshape((112, 112))
        test_img2 = test_img2.reshape((112, 112))

        # lines 181-195 displays to the user gray and gray2, values of k and j, and the estimated classname for the original image chosen
        fig = plt.figure(figsize=(8, 8))
        columns = 2
        rows = 1
        plt.gray()
        fig.add_subplot(rows, columns,
                        1).set_title("adaptive_Threshold\n k = {}".format(k))
        plt.imshow(test_img)
        fig.add_subplot(rows, columns,
                        2).set_title("Threshold\n j = {}".format(j))
        plt.imshow(test_img2)
        if k > j:
            fig.suptitle("Image shows a {}\n k > j".format(classname))

        if j > k:
            fig.suptitle("Image shows a {}\n j > k".format(classname))

        plt.show()

        print("\nadaptive_Threshold prediction:\n", prediction_list, "\n")
        print("Threshold prediction:\n", prediction2_list)
        print("\nRunning: " + "Alnet-2.0.")
コード例 #21
0
    # # cap.set(cv2.CAP_PROP_BUFFERSIZE, 3);
    # cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
    ret, original_frame = cap.read()
    out.write(original_frame)

    #we will use this later

    gray = cv2.cvtColor(original_frame, cv2.COLOR_BGR2GRAY)
    # Blurring the unwanted stuff
    blur = cv2.GaussianBlur(gray, (9, 9), 0)

    # Adaptive threshold for to obtain a Binary Image from the frame
    # followed by Hough lines probablistic transform to highlight the straight line proposals
    # https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_thresholding/py_thresholding.html

    adaptive = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                     cv2.THRESH_BINARY, 11, 2)
    processed = cv2.bitwise_not(adaptive, adaptive)
    # processed_not_dilated = processed.cop

    # Dilate the image to increase the size of the grid lines.
    kernel = np.array([[0., 1., 0.], [1., 1., 1.], [0., 1., 0.]],
                      dtype=np.uint8)
    processed = cv2.dilate(processed, kernel)

    # Find the proposals sudoku contour- Contours are all posssible points. We will create the grid using contours
    contours, hier = cv2.findContours(processed.copy(), cv2.RETR_EXTERNAL,
                                      cv2.CHAIN_APPROX_SIMPLE)
    external_contours = cv2.drawContours(processed.copy(), contours, -1,
                                         (255, 0, 0), 2)

    current_time = (time.time() - starttime)
コード例 #22
0
    def prepareData(self,cvl=False,iam=False,cvl_iam=False):
        input_len = []
        text_list = []
        original_text = []
        label_len = []
        max_len = 0
        image = []

        if iam:
            
            print('LOADING IAM DATASET')
            image_name_path,text,len_text = self.read_data()

        elif cvl:

            print('LOADING CVL DATASET')
            path = r'D:\cvl-database-1-1\testset\words'
            image_name_path = [ ]
            text = [ ]
            len_text = [ ]
            
            for root,dirs,files in os.walk(path):
                for image_name in files:
                    image_name_path.append(os.path.join(root,image_name))
                    val = image_name.split('-')[-1].split('.')[0]  
                    text.append(val.split('.')[0])
                    len_text.append(len(val))
        else:
            ## IN DEVELOPMENT DONT CALL THIS LOOP 
            
            image_name_path,text,len_text = self.read_data()
            
            for root,dirs,files in os.walk(path):
                for image_name in files:
                    image_name_path.append(os.path.join(root,image_name))
                    val = image_name.split('-')[-1].split('.')[0] 
                    text.append(val.split('.')[0])
                    len_text.append(len(val))
    
            

        for image_name,text_word,text_len in tqdm(zip(image_name_path,text,len_text)):
            
            load_image = cv2.imread(image_name)
            try:
                img = cv2.cvtColor(load_image,cv2.COLOR_BGR2GRAY)
                img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,blockSize=91,C=11)
            except:
                continue

            # Dialiate and eroding of the image to make the image look much clearer
            kernal = np.ones((5,5),np.uint8)
            #img = cv2.erode(img,kernal,1)
            #img = cv2.dilate(img,kernal,1)

            # Image size adjustment to make all the image of shape (128,32)(h*w)
            height,width = img.shape
                
            if ((height > 32) or (width > 128)):
                img = cv2.resize(img,(128,32),interpolation = cv2.INTER_AREA)
            else:
                if height < 32:
                    add_ones = np.ones((32-height,width)) * 255
                    try: 
                        img = np.concatenate((img,add_ones))
                    except:
                        continue

                if width < 128:
                    add_ones = np.ones((height,128-width)) * 255 
                    try:
                        img = np.concatenate((img,add_ones),axis=1)
                    except:
                        continue
            
            img = np.expand_dims(img,axis=2)
            
            # Encode text
            encode_text = self.__encode_string_to_numbers(text_word)
            
            # Len of text
            if text_len > max_len:
                max_len = text_len
            
            
            image.append(img)
            text_list.append(encode_text)
            original_text.append(text_word)
            input_len.append(len(encode_text))
            label_len.append(len(text_word))
            
        return image,text_list,input_len,label_len,original_text,max_len
コード例 #23
0
ファイル: take.py プロジェクト: priyavaddineni/Cartoonify
while True:
    ret, frame = cam.read()
    if not ret:
        break
    cv2.imshow("test", frame)

    if cv2.waitKey(1) & 0xFF == 27:
        break
    elif cv2.waitKey(1) & 0xFF == 32:

        img_name = "opencv_frame_{}.png".format(img_counter)
        cv2.imwrite(img_name, frame)

        img_counter += 1

cam.release()

cv2.destroyAllWindows()
img = cv2.imread('//project//project//opencv_frame_0.png')

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.medianBlur(gray, 5)
edges = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                              cv2.THRESH_BINARY, 21, 8)

color = cv2.medianBlur(img, 19)
cartoon = cv2.bitwise_and(color, color, mask=edges)

cv2.imwrite("//project//project//media//temp.png", cartoon)
print("/media/temp.png")
コード例 #24
0
import numpy as np
from cv2 import cv2
import pytesseract
from PIL import Image

# Load image, grayscale, adaptive threshold
image = cv2.imread('captchas/10.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                               cv2.THRESH_BINARY, 11, 3)

# Morph open
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)

# Remove noise by filtering using contour area
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    area = cv2.contourArea(c)
    if area < 10:
        cv2.drawContours(opening, [c], -1, (0, 0, 0), -1)

# Invert image for result
result = 255 - opening

cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('result', result)

cv2.imwrite("result2.png", result)
コード例 #25
0
    motoren = simulator

    # CPU quälen:
    while True:
        # frame lesen
        dieser_frame = kamera.get_frame()

        ########################################
        # verschiedene Varianten von threshold #
        ########################################
        # Kopie von dieser_frame in Graustufen konvertieren
        frame_graustufen = cv2.cvtColor(dieser_frame, cv2.COLOR_BGR2GRAY)
        _, frame_schwarz_weiss1 = cv2.threshold(frame_graustufen, 110, 255,
                                                cv2.THRESH_BINARY_INV)
        frame_schwarz_weiss2 = cv2.adaptiveThreshold(
            frame_graustufen, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
            cv2.THRESH_BINARY_INV, 11, 9)
        frame_schwarz_weiss3 = cv2.adaptiveThreshold(
            frame_graustufen, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY_INV, 11, 6)

        # Kamerabild anzeigen
        cv2.imshow("Original", dieser_frame)
        cv2.imshow("Global Threshold", frame_schwarz_weiss1)
        cv2.imshow("Adaptive Mean", frame_schwarz_weiss2)
        cv2.imshow("Adaptive Gaussian", frame_schwarz_weiss3)

        # Motoren ansteuern
        motoren.set_speed(30, 30)

        # Pausieren mit p, bewegen mit w,a,s,d, rotieren mit q,e, beenden mit k
コード例 #26
0
        As = [0,30,60,90,120,150]

        filtered_img = np.zeros(image_nor.shape, np.float32)
        for A in As:
            #cv.getGaborKernel(ksize, sigma, theta, lambd, gamma[, psi[, ktype]])
            g_kernel = cv2.getGaborKernel((27, 27), 3.4, np.pi*A/180, 8.1, 0.9, 0, ktype=cv2.CV_32F)
            _filtered_img = cv2.filter2D(image_nor, cv2.CV_8UC3, g_kernel)
            filtered_img += _filtered_img
        
        # normalizacja filtru
        filtered_img = filtered_img / filtered_img.max() *255
        filtered_img = filtered_img.astype(np.uint8)

        # uzyskanie z filtracji kodu dwuwymiarowego i zbinaryzowanego 
        filtered_img = cv2.cvtColor(filtered_img, cv2.COLOR_BGR2GRAY)
        filtered_img = cv2.adaptiveThreshold(filtered_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                             cv2.THRESH_BINARY, 19, 1)  

        # obsługa zapisu kodów lub wyszukiwania
        if Baza.count(file)==True:
            
            cv2.imwrite("Baza/kod"+array2string(np.array(files.index(file)))+".bmp",filtered_img)
            print(file)
        else:
            Search(filtered_img, file)

    else:
        NotFound.append(file)
        #print(file + " ----> błąd:  nie wykryto tęczówki")
        continue

cv2.waitKey(0)
コード例 #27
0
ファイル: scan.py プロジェクト: xuansonle/OpenCV-Application
    width_ = w
    height_ = h
    #Or force it to be A4 format in 300 DPI
    width_ = 1240
    height_ = 1754

    #New coordinates
    new_rect = np.array(
        [[0, 0], [width_ - 1, 0], [width_ - 1, height_ - 1], [0, height_ - 1]],
        dtype="float32")

    M = cv2.getPerspectiveTransform(rect, new_rect)
    output_gray = cv2.warpPerspective(gray, M, (width_, height_))

    output = cv2.adaptiveThreshold(output_gray, 255,
                                   cv2.ADAPTIVE_THRESH_MEAN_C,
                                   cv2.THRESH_BINARY, 5, 2)

except Exception as e:

    print(str(e))
    sys.exit(0)

print("Print")
# MATPLOTLIB
imgs = [img, output_gray, output]
i = 1
for img in imgs:
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.subplot(1, len(imgs), i)
    plt.imshow(img)
コード例 #28
0
    im2 = imgWarpColored.copy()
    im2 = unsharp_mask(im2)
    cv2.imshow("b", im2)

    im3 = im1.copy()
    im3 = unsharp_mask(im3)
    cv2.imshow("a+b", im3)

    im4 = imgWarpColored.copy()
    im4 = cv2.GaussianBlur(im4, (5, 5), 3)
    im4 = cv2.addWeighted(im4, 1.5, im4, -0.5, 0, im4)
    cv2.imshow("a+b+c", im4)

    # APPLY ADAPTIVE THRESHOLD
    imgWarpGray = cv2.cvtColor(imgWarpColored, cv2.COLOR_BGR2GRAY)
    imgAdaptiveThre = cv2.adaptiveThreshold(imgWarpGray, 255, 1, 1, 7, 2)
    imgAdaptiveThre = cv2.bitwise_not(imgAdaptiveThre)
    imgAdaptiveThre = cv2.medianBlur(imgAdaptiveThre, 3)
    cv2.imshow("Warped Image With corners after adaptive theshold1",
               imgAdaptiveThre)

    imgWarpGray1 = cv2.cvtColor(im3, cv2.COLOR_BGR2GRAY)
    imgAdaptiveThre1 = cv2.adaptiveThreshold(imgWarpGray1, 255, 1, 1, 7, 2)
    imgAdaptiveThre1 = cv2.bitwise_not(imgAdaptiveThre1)
    imgAdaptiveThre1 = cv2.medianBlur(imgAdaptiveThre1, 3)
    cv2.imshow("Warped Image With corners after adaptive theshold2",
               imgAdaptiveThre1)

    if cv2.waitKey(0) & 0xFF == ord('s'):
        break
import numpy as np 
from cv2 import cv2

img = cv2.imread('detect_blob.png',1)
gray = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)

thresh_img = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,115,1) #guassion threshold with output binary threshold type
cv2.imshow("Adaptive Thresh Image",thresh_img)

#Contours
contours, hierarchy = cv2.findContours(thresh_img,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) 

thickness = 2
color = (255,0,255)

object=np.zeros([img.shape[0],img.shape[1],3],'uint8')
for c in contours:
    cv2.drawContours(object,[c],-1,color,thickness) #Draw contour of one object at time, -1 means all but c have only 1 object contour
    area = cv2.contourArea(c) #Area of one contour
    perimeter = cv2.arcLength(c,True) #Perimiter of one contour
    print("Perimeter : {}, Area: {}".format(perimeter,area))

    M=cv2.moments(c)
    cx = int(M['m10']/M['m00'])
    cy = int(M['m01']/M['m00'])
    cv2.circle(object,(cx,cy),4,color,-1) #draw centroid as a small unit cirlce in the image at centroid pos    

    cv2.imshow("Centroids",object)
cv2.waitKey(0)
cv2.destroyAllWindows()
 
コード例 #30
0
def read_grid(image):

    sudoku = ""
    side = image.shape[0] // 9  # this is a square image - side will be 50
    offset = 0.1 * side  # for borders

    for i in range(9):
        for j in range(9):

            # coordinates if corners of a small box
            top = max(int(round(side * i + offset)), 0)
            left = max(int(round(side * j + offset)), 0)
            right = min(int(round(side * (j + 1) - offset)), image.shape[0])
            bottom = min(int(round(side * (i + 1) - offset)), image.shape[0])

            # finding the contour inside the box
            crop = image[top:bottom, left:right]
            gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
            blurred = cv2.GaussianBlur(gray, (7, 7), 0)
            thresh = cv2.adaptiveThreshold(blurred, 255,
                                           cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                           cv2.THRESH_BINARY_INV, 11, 2)
            contours, _ = cv2.findContours(thresh, cv2.RETR_TREE,
                                           cv2.CHAIN_APPROX_SIMPLE)

            # finding the largest valid contour
            max_area = 1
            largest_contour = None
            for cnt in contours:
                area = cv2.contourArea(cnt)
                if area > side * side * 0.04 and area > max_area:
                    max_area = area
                    largest_contour = cnt

            # if there is no contour, then the box must be empty
            if largest_contour is None:
                sudoku += "0"
                continue

            # crop out the largest contour i.e. the digit
            rect = cv2.minAreaRect(largest_contour)
            box = cv2.boxPoints(rect)
            box = np.int0(box)

            minx = max(min(box, key=lambda g: g[0])[0] - 3, 0)
            miny = max(min(box, key=lambda g: g[1])[1] - 3, 0)
            maxx = min(max(box, key=lambda g: g[0])[0] + 3, int(side))
            maxy = min(max(box, key=lambda g: g[1])[1] + 3, int(side))

            # gray image of only the number
            number_image = gray[miny:maxy, minx:maxx]
            # OCR
            custom_config = r' --psm 6 -c tessedit_char_whitelist=123456789'
            text = pt.image_to_string(number_image, config=custom_config)

            # if i == 7 and j == 3:
            #     cv2.imshow("cropped",number_image)
            #     print(text)

            try:
                num = int(text)
                sudoku += str(num)
            except:
                # if there is a big contour but its not a digit,
                # this frame is invalid, ignore it
                return None

    return sudoku