Ejemplo n.º 1
0
    def DetectTable():
        content = request.json
        download_url = "https://tableextractor.blob.core.windows.net/extracted-images/1557478921912_Page%2001.png"
        response = urllib.request.urlopen(download_url)
        txt=download_url.split('/')
        imageName=txt[len(txt)-1]
        file = open("ExtracedTables\\"+imageName, 'wb')
        file.write(response.read())
        file.close()
        image=cv.imread("ExtracedTables\\"+imageName,1)
        imageCopy = image
        ippObj = ipp.ImagePreProcessing()
        image = ippObj.GammaAdujst(image)
        image = ippObj.Threshholding(image, 21)
        (contours, intersections) = ippObj.StructureExtraction(image,10)

        # Get tables from the images

        tables = []  # list of tables
        for i in range(len(contours)):
            (rect, table_joints) = tableutils.verify_table(contours[i],intersections)
            if rect == None or table_joints == None:
                continue

            # Create a new instance of a table

            table = TableStructure(rect[0], rect[1], rect[2], rect[3])

            # Get an n-dimensional array of the coordinates of the table joints

            joint_coords = []
            for i in range(len(table_joints)):
                joint_coords.append(table_joints[i][0][0])

            joint_coords = np.asarray(joint_coords)

            # Returns indices of coordinates in sorted order
            # Sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc

            sorted_indices = np.lexsort((joint_coords[:, 0],joint_coords[:, 1]))
            joint_coords = joint_coords[sorted_indices]

            # Store joint coordinates in the table instance

            table.set_joints(joint_coords)
            tables.append(table)    
        te = TableExtraction()
        tables = te.tableSort(tables)
        images = te.ExtractTable(imageCopy, tables)
        cv.imwrite("ExtracedTables/"+imageName, images)
        result=te.StoreExtractedTable(images)
        return result
Ejemplo n.º 2
0
def extract(image):
    mask, horizontal, vertical = get_grid_mask(image)
    contours, _ = cv.findContours(mask, cv.RETR_EXTERNAL,
                                  cv.CHAIN_APPROX_SIMPLE)

    # Find intersections between the lines to determine if the intersections are table joints.
    intersections = cv.bitwise_and(horizontal, vertical)

    tables = []
    for table_number, contour in enumerate(contours):

        # verify that Region of Interest (ROI) is a table
        rect = verify_table(contour, intersections)
        if rect is None:
            continue

        corners = find_corners_from_contour(contour)
        table_image = crop_and_warp(image, corners)

        # add outer borders artificially, some images may not have outer borders
        # this will lead to outer columns being omitted
        table_image = add_border_padding(table_image,
                                         w=(2, 2, 2, 4),
                                         color=(100, 100, 100))

        cv.imwrite('out/final_image.jpg', table_image)

        # find table joints, intersections for the warped table
        m, h, v = get_grid_mask(table_image)
        table_intersections = cv.bitwise_and(h, v)

        intersection_points = find_intersection_mean_cords(table_intersections)

        if len(intersection_points) < 5:
            continue

        table = Table(table_image, intersection_points)
        table.build()
        tables.append(table)

    return tables
Ejemplo n.º 3
0
    def get_cells(self,
                  ori_img,
                  table_coords,
                  debug=False) -> List[np.ndarray]:
        # raise RuntimeError("此模块调试未完成!请选择其他获取表格框的方法!")
        cells = []
        for coord in table_coords:  # for each boarded table
            table_cell = []
            xmin, ymin, xmax, ymax = [int(k) for k in coord
                                      ]  # used for cropping & shifting
            table_img = ori_img[ymin:ymax, xmin:xmax]
            with utils.Timer("Traditional Cell Detection"):
                grayscale = cv.cvtColor(table_img, cv.COLOR_BGR2GRAY)
                filtered = cv.adaptiveThreshold(~grayscale,
                                                self.MAX_THRESHOLD_VALUE,
                                                cv.ADAPTIVE_THRESH_MEAN_C,
                                                cv.THRESH_BINARY,
                                                self.BLOCK_SIZE,
                                                self.THRESHOLD_CONSTANT)

                if debug:
                    cv.namedWindow('filtered', 0)
                    cv.resizeWindow('filtered', 900, 700)
                    cv.imshow('filtered', filtered)
                    cv.waitKey(0)

                horizontal = filtered.copy()
                vertical = filtered.copy()
                horizontal_size = int(horizontal.shape[1] / self.SCALE)
                horizontal_structure = cv.getStructuringElement(
                    cv.MORPH_RECT, (horizontal_size, 1))
                utils.isolate_lines(horizontal, horizontal_structure)

                vertical_size = int(vertical.shape[0] / self.SCALE)
                vertical_structure = cv.getStructuringElement(
                    cv.MORPH_RECT, (1, vertical_size))
                utils.isolate_lines(vertical, vertical_structure)

                mask = horizontal + vertical

                if debug:
                    cv.namedWindow('mask', 0)
                    cv.resizeWindow('mask', 900, 700)
                    cv.imshow('mask', mask)
                    cv.waitKey(0)

                (contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL,
                                                cv.CHAIN_APPROX_SIMPLE)

                intersections = cv.bitwise_and(horizontal, vertical)

                if debug:
                    cv.namedWindow('intersections', 0)
                    cv.resizeWindow('intersections', 900, 700)
                    cv.imshow('intersections', intersections)
                    cv.waitKey(0)

                for i in range(len(contours)):
                    # Verify that region of interest is a table
                    (rect, table_joints) = utils.verify_table(
                        contours[i], intersections)
                    if rect == None or table_joints is None:
                        continue

                    # Create a new instance of a table
                    table = TraditionalTable(rect[0], rect[1], rect[2],
                                             rect[3])

                    # Get an n-dimensional array of the coordinates of the table joints
                    joint_coords = []
                    for j in range(len(table_joints)):
                        joint_coords.append(table_joints[j][0][0])
                    joint_coords = np.asarray(joint_coords)

                    # Returns indices of coordinates in sorted order
                    # Sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc
                    # joint_coords:
                    # [[913 179], [695 179], [548 179], [285 179], [182 179],
                    # [ 72 179], [913 119], [695 119], [548 119], [285 119],
                    # [182 119], [ 72 119], [913  31], [695  31], [548  31],
                    # [457  31], [376  31], [285  31], [182  31], [ 72  31],
                    # [913   0], [695   0], [548   0], ... ]
                    sorted_indices = np.lexsort(
                        (joint_coords[:, 0], joint_coords[:, 1]))
                    joint_coords = joint_coords[sorted_indices]

                    # Store joint coordinates in the table instance
                    table.set_joints(joint_coords)

                    table_entries = table.get_table_entries()
                    for k in range(len(table_entries)):
                        row = table_entries[k]
                        for j in range(len(row)):
                            entry = row[j]  # xyxy
                            table_cell.append([
                                entry[0], entry[1], entry[2], entry[1],
                                entry[2], entry[3], entry[0], entry[3]
                            ])  # xyxyxyxy
            cells.append(np.array(table_cell))

        return cells
Ejemplo n.º 4
0
# Create an image mask with just the horizontal
# and vertical lines in the image. Then find
# all contours in the mask.
mask = horizontal + vertical
(_, contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL,
                                   cv.CHAIN_APPROX_SIMPLE)

# Find intersections between the lines
# to determine if the intersections are table joints.
intersections = cv.bitwise_and(horizontal, vertical)

# Get tables from the images
tables = []  # list of tables
for i in range(len(contours)):
    # Verify that region of interest is a table
    (rect, table_joints) = utils.verify_table(contours[i], intersections)
    if rect == None or table_joints == None:
        continue

    # Create a new instance of a table
    table = Table(rect[0], rect[1], rect[2], rect[3])

    # Get an n-dimensional array of the coordinates of the table joints
    joint_coords = []
    for i in range(len(table_joints)):
        joint_coords.append(table_joints[i][0][0])
    joint_coords = np.asarray(joint_coords)

    # Returns indices of coordinates in sorted order
    # Sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc
    sorted_indices = np.lexsort((joint_coords[:, 0], joint_coords[:, 1]))
Ejemplo n.º 5
0
def conversion_algorithm(path):
    # scanning the image, applying perspective warping
    # and adaptive thresholding
    filtered, warped = scan_img(path)

    # line isolation
    SCALE = 23

    # isolate horizontal and vertical lines using morphological operations
    horizontal = filtered.copy()
    vertical = filtered.copy()

    horizontal_size = int(horizontal.shape[1] / SCALE)
    horizontal_structure = cv.getStructuringElement(cv.MORPH_RECT,
                                                    (horizontal_size, 1))
    utils.isolate_lines(horizontal, horizontal_structure)

    vertical_size = int(vertical.shape[0] / SCALE)
    vertical_structure = cv.getStructuringElement(cv.MORPH_RECT,
                                                  (1, vertical_size))
    utils.isolate_lines(vertical, vertical_structure)

    # TABLE EXTRACTION
    # create an image mask with just the horizontal
    # and vertical lines in the image. Then find
    # all contours in the mask.
    mask = horizontal + vertical
    cv.imwrite("processing_data/detected_lines.jpg", mask)
    (contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL,
                                    cv.CHAIN_APPROX_SIMPLE)

    # find intersections between the lines
    # to determine if the intersections are table joints.
    intersections = cv.bitwise_and(horizontal, vertical)

    # get tables from the images
    tables = []  # list of tables
    for i in range(len(contours)):
        # verify that region of interest is a table
        (rect, table_joints) = utils.verify_table(contours[i], intersections)
        if rect == None or table_joints == None:
            continue

        # create a new instance of a table
        table = Table(rect[0], rect[1], rect[2], rect[3])

        # get an n-dimensional array of the coordinates of the table joints
        joint_coords = []
        for i in range(len(table_joints)):
            joint_coords.append(table_joints[i][0][0])
        joint_coords = np.asarray(joint_coords)

        # returns indices of coordinates in sorted order
        # sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc
        sorted_indices = np.lexsort((joint_coords[:, 0], joint_coords[:, 1]))
        joint_coords = joint_coords[sorted_indices]

        # store joint coordinates in the table instance
        table.set_joints(joint_coords)

        tables.append(table)

        cv.rectangle(warped, (table.x, table.y),
                     (table.x + table.w, table.y + table.h), (0, 255, 0), 1, 8,
                     0)
        cv.imwrite("processing_data/table_boundaries.jpg", warped)
Ejemplo n.º 6
0
def find_table(image):
	# Convert resized RGB image to grayscale
	NUM_CHANNELS = 3
	if len(image.shape) == NUM_CHANNELS:
	    grayscale = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

	# =====================================================
	# IMAGE FILTERING (using adaptive thresholding)
	# =====================================================
	MAX_THRESHOLD_VALUE = 255
	BLOCK_SIZE = 15
	THRESHOLD_CONSTANT = 0

	# Filter image
	filtered = cv.adaptiveThreshold(~grayscale, MAX_THRESHOLD_VALUE, cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY, BLOCK_SIZE, THRESHOLD_CONSTANT)
 
	# =====================================================
	# LINE ISOLATION
	# =====================================================
	SCALE = 15

	# Isolate horizontal and vertical lines using morphological operations
	horizontal = filtered.copy()
	vertical = filtered.copy()

	horizontal_size = int(horizontal.shape[1] / SCALE)
	horizontal_structure = cv.getStructuringElement(cv.MORPH_RECT, (horizontal_size, 1))
	utils.isolate_lines(horizontal, horizontal_structure)

	vertical_size = int(vertical.shape[0] / SCALE)
	vertical_structure = cv.getStructuringElement(cv.MORPH_RECT, (1, vertical_size))
	utils.isolate_lines(vertical, vertical_structure)

	# =====================================================
	# TABLE EXTRACTION
	# =====================================================
	# Create an image mask with just the horizontal
	# and vertical lines in the image. Then find
	# all contours in the mask.
	mask = horizontal + vertical
	(contours, _) = cv.findContours(mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)

	# Find intersections between the lines
	# to determine if the intersections are table joints.
	intersections = cv.bitwise_and(horizontal, vertical)

	# Get tables from the images
	tables = [] # list of tables
	for i in range(len(contours)):
	    # Verify that region of interest is a table
	    (rect, table_joints) = utils.verify_table(contours[i], intersections)
	    if rect == None or table_joints == None:
	        continue

	    # Create a new instance of a table
	    table = Table(rect[0], rect[1], rect[2], rect[3])

	    # Get an n-dimensional array of the coordinates of the table joints
	    joint_coords = []
	    for i in range(len(table_joints)):
	        joint_coords.append(table_joints[i][0][0])
	    joint_coords = np.asarray(joint_coords)

	    # Returns indices of coordinates in sorted order
	    # Sorts based on parameters (aka keys) starting from the last parameter, then second-to-last, etc
	    sorted_indices = np.lexsort((joint_coords[:, 0], joint_coords[:, 1]))
	    joint_coords = joint_coords[sorted_indices]

	    # Store joint coordinates in the table instance
	    table.set_joints(joint_coords)

	    tables.append(table)
	if len(tables)!=0:
	    return tables
	else:
		return []