コード例 #1
0
class SudokuExtractor:
    def __init__(self, image=None):
        if image != None:
            self.image = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
        else:
            image = None
        self.model = DigitRecognizer()
        self.model.load_model('num_reader')
        self.digits = None
        self.grid = None
        self.helper = Helper()

    def load_image(self, image_dst):
        """Loads image"""
        self.image = cv2.imread(image_dst, cv2.IMREAD_GRAYSCALE)

    def extract_puzzle(self):
        cropped_image = self.warp_image(
            self.find_grid(self.pre_process_image(self.image)))
        squares = self.helper.infer_grid(cropped_image)
        self.digits = self.get_digits(cropped_image, squares, 28)
        self.grid = self.get_grid()

    def pre_process_image(self, image, skip_dilate=False):
        """use blur, threshold and dilation to get the main features of the image"""

        # Gaussian blur with a kernal size (height, width) of 9.
        # Note that kernal sizes must be positive and odd and the kernel must be square.
        processed_img = cv2.GaussianBlur(image.copy(), (9, 9), 0)

        # Adaptive threshold using 11 nearest neighbour pixels
        processed_img = cv2.adaptiveThreshold(processed_img, 255,
                                              cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                              cv2.THRESH_BINARY, 11, 2)

        # Invert colours, so gridlines have non-zero pixel values.
        # Necessary to dilate the image, otherwise will look like erosion instead.
        processed_img = cv2.bitwise_not(processed_img, processed_img)

        if not skip_dilate:
            kernel = np.array([[0., 1., 0.], [1., 1., 1.], [0., 1., 0.]],
                              np.uint8)
            processed_img = cv2.dilate(processed_img, kernel)
        return processed_img

    def find_grid(self, processed_img):
        """find corners of largest contour which (hopefully) is the grid of the puzzle"""
        #mode CV_RETR_EXTERNAL retrieves only the extreme outer contours
        #method CV_CHAIN_APPROX_SIMPLE compresses horizontal, vertical, and diagonal segments and leaves only their end points
        new_image, contours, hierarchy = cv2.findContours(
            processed_img.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        #Sort contours from largest to smallest
        contours = sorted(contours, key=cv2.contourArea, reverse=True)
        grid = contours[0]

        #add each coordinate of (x,y) pair of the largest contour together
        #the index of the smallest sum is the top_left coordinate
        #the index of the maximum is the bottom_right
        br_tl_list = [point[0][0] + point[0][1] for point in grid]
        top_left = br_tl_list.index(min(br_tl_list))
        bottom_right = br_tl_list.index(max(br_tl_list))

        #substract each coordinate of (x,y) pair of the largest contour
        #the index of the smallest difference is the bottom_left coordinate
        #the index of the maximum is the top_right
        bl_tr_list = [point[0][0] - point[0][1] for point in grid]
        bottom_left = bl_tr_list.index(min(bl_tr_list))
        top_right = bl_tr_list.index(max(bl_tr_list))

        return [
            grid[top_left][0], grid[top_right][0], grid[bottom_right][0],
            grid[bottom_left][0]
        ]

    def warp_image(self, proc_image):
        """Crops and warps a rectangular section from an image into a square of similar size."""
        # Rectangle described by top left, top right, bottom right and bottom left points
        top_left, top_right, bottom_right, bottom_left = proc_image[
            0], proc_image[1], proc_image[2], proc_image[3]

        # Explicitly set the data type to float32 or `getPerspectiveTransform` will throw an error
        src = np.float32([[top_left, top_right, bottom_right, bottom_left]])

        # Get the longest side in the rectangle
        side = max([
            self.helper.distance_between(bottom_right, top_right),
            self.helper.distance_between(top_left, bottom_left),
            self.helper.distance_between(bottom_right, bottom_left),
            self.helper.distance_between(top_left, top_right)
        ])

        # Describe a square with side of the calculated length, this is the new perspective we want to warp to
        dst = np.float32([[0, 0], [side - 1, 0], [side - 1, side - 1],
                          [0, side - 1]])

        # Gets the transformation matrix for skewing the image to fit a square by comparing the 4 before and after points
        M = cv2.getPerspectiveTransform(src, dst)

        return cv2.warpPerspective(self.image, M, (int(side), int(side)))

    def cut_from_rect(self, img, rect):
        """Cuts a rectangle from an image using the top left and bottom right points."""
        """region_of_interest = img[y1:y2, x1:x2]"""
        return img[int(rect[0][1]):int(rect[1][1]),
                   int(rect[0][0]):int(rect[1][0])]

    def find_largest_feature(self, inp_img, scan_tl=None, scan_br=None):
        """
        Uses the fact the `floodFill` function returns a bounding box of the area it filled to find the biggest
        connected pixel structure in the image. Fills this structure in white, reducing the rest to black.
        """
        img = inp_img.copy()  # Copy the image, leaving the original untouched
        height, width = img.shape[:2]
        max_area = 0
        seed_point = (None, None)  #Starting point

        if scan_tl is None:
            scan_tl = [0, 0]

        if scan_br is None:
            scan_br = [width, height]

        # Loop through the image
        for x in range(scan_tl[0], scan_br[0]):
            for y in range(scan_tl[1], scan_br[1]):
                # Only operate on light or white squares
                if img.item(
                        y, x
                ) == 255 and x < width and y < height:  # Note that .item() appears to take input as y, x
                    area = cv2.floodFill(img, None, (x, y), 64)
                    if area[0] > max_area:
                        max_area = area[0]
                        seed_point = (x, y)

        # Colour everything remaining black
        for x in range(width):
            for y in range(height):
                if img.item(y, x) == 255 and x < width and y < height:
                    cv2.floodFill(img, None, (x, y), 0)

        #[p is not None for p in seed_point] checks if (x,y) in seed_point are not None, but values
        if all([p is not None for p in seed_point]):
            cv2.floodFill(img, None, seed_point, 255)

        top, bottom, left, right = height, 0, width, 0
        #Find the bounding parameters
        for x in range(width):
            for y in range(height):
                if img.item(y, x) == 255:
                    top = y if y < top else top
                    bottom = y if y > bottom else bottom
                    left = x if x < left else left
                    right = x if x > right else right
        #cv2.rectangle(img, (left, top), (right, bottom),(255, 0, 0))

        bounding_box = [(left, top), (right, bottom)]
        return bounding_box, seed_point

    def extract_digit(self, img, rect, size):
        """Extracts a digit (if one exists) from a Sudoku square."""
        digit_square = self.cut_from_rect(
            img, rect)  # Get the digit box from the whole square

        # Use fill feature finding to get the largest feature in middle of the box
        # Margin used to define an area in the middle we would expect to find a pixel belonging to the digit
        h, w = digit_square.shape[:2]
        centre = int(np.mean([h, w]) / 2.5)
        #display_points(digit_square, [[centre, centre],[w - centre, h - centre]])
        bounding_box, seed = self.find_largest_feature(
            digit_square, [centre, centre], [w - centre, h - centre])
        digit = self.cut_from_rect(digit_square, bounding_box)

        w_b = bounding_box[1][0] - bounding_box[0][0]
        h_b = bounding_box[1][1] - bounding_box[0][1]
        if w_b > 0 and h_b > 0 and (w_b * h_b) > 100 and len(digit) > 14:
            return self.centralize_digit(digit, bounding_box, [h, w])
        else:
            return np.zeros((size, size), np.uint8)

    def image_centre(self, rectangle):
        h, w = rectangle.shape[:2]
        tl = (0, 0)
        br = (w, h)
        x = (br[0] + tl[0]) / 2
        y = (br[1] + tl[1]) / 2
        centre = (x, y)
        return centre

    def centralize_digit(self, digit, bbox, size_of_dst):
        #Calcualte centre of digit_image
        digit_x, digit_y = self.image_centre(digit)[:2]

        #calculate center of dst
        dst = np.zeros((size_of_dst[0], size_of_dst[1]), np.uint8)
        dst_x, dst_y = self.image_centre(dst)

        ## (2) Calc offset
        x_offset = int(dst_x - digit_x)
        y_offset = int(dst_y - digit_y)

        ## (3) do slice-op `paste`
        h, w = digit.shape[:2]

        #cv2.rectangle(dst, (int(x_offset), int(y_offset)), (int(x_offset+w), int(y_offset+h)),(255, 0, 0))
        img = dst.copy()
        img[y_offset:y_offset + digit.shape[0],
            x_offset:x_offset + digit.shape[1]] = digit
        return cv2.resize(img, (28, 28))

    def get_digits(self, img, squares, size):
        """Extracts digits from their cells and build an array"""
        digits = []
        img = self.pre_process_image(img, skip_dilate=True)
        for square in squares:
            digits.append(self.extract_digit(img, square, size))
        return digits

    def get_grid(self):
        #self.model.load_model()
        #model = tf.keras.models.load_model('sudoku_num_reader.model')
        prediction = self.model.make_prediction(self.digits)
        grid = []
        row = []
        counter = 0
        for i in range(1, 82):
            if (self.digits[i - 1].any()):
                if prediction[counter] == 0:
                    row.append(' ')
                    counter += 1
                else:
                    row.append(prediction[counter])
                    counter += 1
            else:
                row.append(' ')
            if i != 0 and i % 9 == 0 or i == 81:
                grid.append(row)
                row = []
        return grid

    def print_sudoku(self):
        print("+" + "---+" * 9)
        for i, row in enumerate(self.grid):
            print(("|" + " {}   {}   {} |" * 3).format(
                *[x if x != 0 else " " for x in row]))
            if i % 3 == 2:
                print("+" + "---+" * 9)
            else:
                print("+" + "   +" * 9)

    def show_extraction(self):
        processed = self.pre_process_image(self.image)
        corners = self.find_grid(processed)
        self.helper.display_points(processed, corners)
        cropped = self.warp_image(corners)
        self.helper.show_image(cropped)
        squares = self.helper.infer_grid(cropped)
        self.helper.display_rects(cropped, squares)
        self.digits = self.get_digits(cropped, squares, 28)
        self.helper.show_digits(self.digits)
        self.grid = self.get_grid()