def test_matrix(self):
     builder = NeighborGraphBuilder([{
         'x': 500,
         'width': 50,
         'y': 500,
         'height': 100
     }, {
         'x': 300,
         'width': 100,
         'y': 500,
         'height': 100
     }, {
         'x': 500,
         'width': 100,
         'y': 300,
         'height': 100
     }, {
         'x': 500,
         'width': 100,
         'y': 700,
         'height': 100
     }], np.zeros((1000, 1000)))
     m = builder.get_neighbor_matrix()
     assert (m[0, 0] == 1)
     assert (m[0, 1] == 2)
     assert (m[0, 3] == 3)
     assert (m[0, 2] == -1)
     assert (m[1, 0] == -1)
     assert (m[1, 1] == -1)
     assert (m[1, 2] == 0)
     assert (m[1, 3] == -1)
Esempio n. 2
0
    def dump_doc(self, all_tokens, all_tokens_rects, image, file_name):
        N = len(all_tokens)
        height, width = np.shape(image)
        classes = np.zeros(N)
        rect_matrix = np.zeros((N, 4))
        embeddings_matrix = np.zeros((N, 300))
        for i in range(N):
            token_rect = all_tokens_rects[i]
            index = 0 if image[int(token_rect['y'] + token_rect['height'] / 2),
                               int(token_rect['x'] +
                                   token_rect['width'] / 2)] == 0 else 1
            classes[i] = index
            rect_matrix[i, 0] = token_rect['x'] / width
            rect_matrix[i, 1] = token_rect['y'] / height
            rect_matrix[i, 2] = token_rect['width'] / width
            rect_matrix[i, 3] = token_rect['height'] / height
            embedding = self.glove_reader.get_vector(all_tokens[i])
            if embedding is None:
                embedding = np.ones((300)) * (-1)
            embeddings_matrix[i] = embedding

        graph_builder = NeighborGraphBuilder(all_tokens_rects, image)
        neighbor_graph, neighbor_distance_matrix = graph_builder.get_neighbor_matrix(
        )
        neighbor_distance_matrix[:, 0] = neighbor_distance_matrix[:, 0] / width
        neighbor_distance_matrix[:,
                                 1] = neighbor_distance_matrix[:, 1] / height
        neighbor_distance_matrix[:, 2] = neighbor_distance_matrix[:, 2] / width
        neighbor_distance_matrix[:,
                                 3] = neighbor_distance_matrix[:, 3] / height
        document = DocumentFeatures(embeddings_matrix, rect_matrix,
                                    neighbor_distance_matrix, neighbor_graph,
                                    classes)
        with open(file_name, 'wb') as f:
            pickle.dump(document, f, pickle.HIGHEST_PROTOCOL)
    def see_table(self, table, increment):
        print("Converting doc", self.png_path)

        table_attributes = table.attrib
        tx1 = int(table_attributes['x0'])
        ty1 = int(table_attributes['y0'])
        tx2 = int(table_attributes['x1'])
        ty2 = int(table_attributes['y1'])

        model = ConvolutionalAutoencoder()
        model.prepare_for_manual_testing()

        image_table_cropped = self.image[ty1:ty2 + 1, tx1:tx2 + 1]
        spatial_features = model.get_feature_map(image_table_cropped).astype(
            np.float64)

        sorted_path_full = self.sorted_path + "-%d" % increment
        if not dont_output:
            if not os.path.exists(sorted_path_full):
                os.mkdir(sorted_path_full)

        data_image = np.zeros((self.rows, self.cols, 3), dtype=np.int32)

        rows_xml = table.findall('Row')
        rows_matrix = np.zeros((len(rows_xml), 4))
        rr = 0
        last_y = ty1
        for row in rows_xml:
            row_attrib = row.attrib
            x1 = rows_matrix[rr, 0] = int(row_attrib['x0'])
            y1 = rows_matrix[rr, 1] = int(row_attrib['y0'])
            x2 = rows_matrix[rr, 2] = int(row_attrib['x1'])
            y2 = rows_matrix[rr, 3] = int(row_attrib['y1'])
            rr += 1
            data_image[last_y:y1 + 1, x1:x2 + 1, 0] = rr
            last_y = y1 + 1
        data_image[last_y:ty2, tx1:tx2 + 1, 0] = rr

        columns_xml = table.findall('Column')
        cols_matrix = np.zeros((len(columns_xml), 4))
        cc = 0
        last_x = tx1
        for col in columns_xml:
            col_attrib = col.attrib
            x1 = cols_matrix[cc, 0] = int(col_attrib['x0'])
            y1 = cols_matrix[cc, 1] = int(col_attrib['y0'])
            x2 = cols_matrix[cc, 2] = int(col_attrib['x1'])
            y2 = cols_matrix[cc, 3] = int(col_attrib['y1'])
            cc += 1
            data_image[y1:y2 + 1, last_x:x1 + 1, 1] = cc
            last_x = x1 + 1
        data_image[ty1:ty2, last_x:tx2, 1] = cc

        cells_xml = table.findall('Cell')
        ll = 0
        for cell_xml in cells_xml:
            bounding_box = cell_xml.attrib
            if bounding_box['dontCare'] == 'true':
                continue
            x1 = int(bounding_box['x0'])
            y1 = int(bounding_box['y0'])
            x2 = int(bounding_box['x1'])
            y2 = int(bounding_box['y1'])
            ll += 1
            data_image[y1:y2 + 1, x1:x2 + 1, 2] = ll
        show_1 = ((data_image[:, :] * 100) % 256).astype(np.uint8)
        if show:
            # show_2 = ((data_image[:,:,1] * 100) % 256).astype(np.uint8)
            # show_3 = ((data_image[:,:,2] * 100) % 256).astype(np.uint8)

            # show_1 = cv2.resize(show_1, None, fx=0.25, fy=0.25)
            # cv2.imshow('rows', show_1)
            # # show_2 = cv2.resize(show_2, None, fx=0.25, fy=0.25)
            # # cv2.imshow('cols', show_2)
            # # show_3 = cv2.resize(show_3, None, fx=0.25, fy=0.25)
            # # cv2.imshow('cells', show_3)
            #
            # cv2.waitKey(0)
            pass

        all_tokens = []
        all_tokens_rects = []
        for i in range(len(self.all_tokens)):
            token = self.all_tokens[i]
            token_rect = self.all_tokens_rects[i]
            mid = [
                int(token_rect['x'] + token_rect['width'] / 2),
                int(token_rect['y'] + token_rect['height'] / 2)
            ]
            if data_image[mid[1], mid[0], 0] == 0:
                continue
            all_tokens.append(token)
            all_tokens_rects.append(token_rect)

        N = len(all_tokens)

        row_share_matrix = np.zeros((N, N))
        col_share_matrix = np.zeros((N, N))
        cell_share_matrix = np.zeros((N, N))

        neighbors_same_row = np.zeros((N, 4))
        neighbors_same_col = np.zeros((N, 4))
        neighbors_same_cell = np.zeros((N, 4))

        graph_builder = NeighborGraphBuilder(all_tokens_rects, data_image[:, :,
                                                                          0])
        M, D = graph_builder.get_neighbor_matrix()

        for i in range(N):
            left_index = int(M[i, 0])
            top_index = int(M[i, 1])
            right_index = int(M[i, 2])
            bottom_index = int(M[i, 3])

            token_rect = all_tokens_rects[i]
            mid = [
                int(token_rect['x'] + token_rect['width'] / 2),
                int(token_rect['y'] + token_rect['height'] / 2)
            ]

            if left_index != -1:
                token_rect_2 = all_tokens_rects[left_index]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share row
                if data_image[mid[1], mid[0], 0] == data_image[mid_2[1],
                                                               mid_2[0], 0]:
                    neighbors_same_row[i, 0] = 1
                # They share column
                if data_image[mid[1], mid[0], 1] == data_image[mid_2[1],
                                                               mid_2[0], 1]:
                    neighbors_same_col[i, 0] = 1
                # They share cell
                if data_image[mid[1], mid[0], 2] == data_image[mid_2[1],
                                                               mid_2[0], 2]:
                    neighbors_same_cell[i, 0] = 1

            if top_index != -1:
                token_rect_2 = all_tokens_rects[top_index]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share row
                if data_image[mid[1], mid[0], 0] == data_image[mid_2[1],
                                                               mid_2[0], 0]:
                    neighbors_same_row[i, 1] = 1
                # They share column
                if data_image[mid[1], mid[0], 1] == data_image[mid_2[1],
                                                               mid_2[0], 1]:
                    neighbors_same_col[i, 1] = 1
                # They share cell
                if data_image[mid[1], mid[0], 2] == data_image[mid_2[1],
                                                               mid_2[0], 2]:
                    neighbors_same_cell[i, 1] = 1

            if right_index != -1:
                token_rect_2 = all_tokens_rects[right_index]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share row
                if data_image[mid[1], mid[0], 0] == data_image[mid_2[1],
                                                               mid_2[0], 0]:
                    neighbors_same_row[i, 2] = 1
                # They share column
                if data_image[mid[1], mid[0], 1] == data_image[mid_2[1],
                                                               mid_2[0], 1]:
                    neighbors_same_col[i, 2] = 1
                # They share cell
                if data_image[mid[1], mid[0], 2] == data_image[mid_2[1],
                                                               mid_2[0], 2]:
                    neighbors_same_cell[i, 2] = 1

            if bottom_index != -1:
                token_rect_2 = all_tokens_rects[bottom_index]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share row
                if data_image[mid[1], mid[0], 0] == data_image[mid_2[1],
                                                               mid_2[0], 0]:
                    neighbors_same_row[i, 3] = 1
                # They share column
                if data_image[mid[1], mid[0], 1] == data_image[mid_2[1],
                                                               mid_2[0], 1]:
                    neighbors_same_col[i, 3] = 1
                # They share cell
                if data_image[mid[1], mid[0], 2] == data_image[mid_2[1],
                                                               mid_2[0], 2]:
                    neighbors_same_cell[i, 3] = 1

        for i in range(N):
            token = all_tokens[i]
            token_rect = all_tokens_rects[i]
            mid = [
                int(token_rect['x'] + token_rect['width'] / 2),
                int(token_rect['y'] + token_rect['height'] / 2)
            ]
            for j in range(N):
                token_2 = all_tokens[j]
                token_rect_2 = all_tokens_rects[j]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share row
                if data_image[mid[1], mid[0], 0] == data_image[mid_2[1],
                                                               mid_2[0], 0]:
                    row_share_matrix[i, j] = 1
                # They share column
                if data_image[mid[1], mid[0], 1] == data_image[mid_2[1],
                                                               mid_2[0], 1]:
                    col_share_matrix[i, j] = 1
                # They share cell
                if data_image[mid[1], mid[0], 2] == data_image[mid_2[1],
                                                               mid_2[0], 2]:
                    cell_share_matrix[i, j] = 1

        self.dump_table(all_tokens, all_tokens_rects, M, D, row_share_matrix,
                        col_share_matrix, cell_share_matrix,
                        neighbors_same_row, neighbors_same_col,
                        neighbors_same_cell, show_1, spatial_features,
                        os.path.join(sorted_path_full, '__dump__.pickle'))
        cv2.imwrite(os.path.join(sorted_path_full, 'visual.png'), show_1)
Esempio n. 4
0
    def dump_doc(self, all_tokens, all_tokens_rects, spatial_features,
                 file_name):
        N = len(all_tokens)
        height, width, _ = np.shape(self.image)
        classes = np.zeros(N)
        inside_same_table = np.zeros((N, 4))
        rect_matrix = np.zeros((N, 4))
        embeddings_matrix = np.zeros((N, 300))

        features_spatial_height, features_spatial_width, depth = np.shape(
            spatial_features)

        conv_features = np.zeros((N, depth))

        graph_builder = NeighborGraphBuilder(all_tokens_rects,
                                             self.image_tables)

        if not dont_output:
            if not os.path.exists(self.sorted_path):
                os.mkdir(self.sorted_path)

        neighbor_graph, neighbor_distance_matrix = graph_builder.get_neighbor_matrix(
        )
        neighbor_distance_matrix[:, 0] = neighbor_distance_matrix[:, 0] / width
        neighbor_distance_matrix[:,
                                 1] = neighbor_distance_matrix[:, 1] / height
        neighbor_distance_matrix[:, 2] = neighbor_distance_matrix[:, 2] / width
        neighbor_distance_matrix[:,
                                 3] = neighbor_distance_matrix[:, 3] / height
        draw_image = np.copy(self.image)

        for i in range(N):
            token_rect = all_tokens_rects[i]
            index = self.image_tables[int(token_rect['y'] +
                                          token_rect['height'] / 2),
                                      int(token_rect['x'] +
                                          token_rect['width'] / 2)]

            left_rect = all_tokens_rects[int(neighbor_graph[i, 0])]
            top_rect = all_tokens_rects[int(neighbor_graph[i, 1])]
            right_rect = all_tokens_rects[int(neighbor_graph[i, 2])]
            bottom_rect = all_tokens_rects[int(neighbor_graph[i, 3])]

            if index == 0:
                index_left = index_right = index_top = index_bottom = 0
            else:
                index_left = 0 if self.image_tables[
                    int(left_rect['y'] + left_rect['height'] / 2),
                    int(left_rect['x'] + left_rect['width'] /
                        2)] == index or int(neighbor_graph[i, 0]) == -1 else 1
                index_top = 0 if self.image_tables[
                    int(top_rect['y'] + top_rect['height'] / 2),
                    int(top_rect['x'] + top_rect['width'] /
                        2)] == index or int(neighbor_graph[i, 1]) == -1 else 1
                index_right = 0 if self.image_tables[
                    int(right_rect['y'] + right_rect['height'] / 2),
                    int(right_rect['x'] + right_rect['width'] /
                        2)] == index or int(neighbor_graph[i, 2]) == -1 else 1
                index_bottom = 0 if self.image_tables[
                    int(bottom_rect['y'] + bottom_rect['height'] / 2),
                    int(bottom_rect['x'] + bottom_rect['width'] /
                        2)] == index or int(neighbor_graph[i, 3]) == -1 else 1

            inside_same_table[i, 0] = index_left
            inside_same_table[i, 1] = index_top
            inside_same_table[i, 2] = index_right
            inside_same_table[i, 3] = index_bottom

            color = (0, 0, 255) if index == 0 else (255, 0, 0)
            if index_left != 0 or index_top != 0 or index_right != 0 or index_bottom != 0:
                color = (0, 255, 0)
            cv2.rectangle(draw_image,
                          (int(token_rect['x']), int(token_rect['y'])),
                          (int(token_rect['x'] + token_rect['width']),
                           int(token_rect['y'] + token_rect['height'])), color,
                          3)
        draw_path = os.path.join(self.sorted_path, 'visual.png')
        print(draw_path)
        cv2.imwrite(draw_path, draw_image)

        for i in range(N):
            token_rect = all_tokens_rects[i]
            index = 0 if self.image_tables[int(token_rect['y'] +
                                               token_rect['height'] / 2),
                                           int(token_rect['x'] +
                                               token_rect['width'] /
                                               2)] == 0 else 1
            classes[i] = index
            rect_matrix[i, 0] = token_rect['x'] / width
            rect_matrix[i, 1] = token_rect['y'] / height
            rect_matrix[i, 2] = token_rect['width'] / width
            rect_matrix[i, 3] = token_rect['height'] / height

            feat_x = int((rect_matrix[i, 0] + rect_matrix[i, 2] / 2) *
                         features_spatial_width)
            feat_y = int((rect_matrix[i, 1] + rect_matrix[i, 3] / 2) *
                         features_spatial_height)

            assert feat_x < features_spatial_width and feat_y < features_spatial_height

            conv_features[i] = spatial_features[feat_y, feat_x]

            embedding = self.glove_reader.get_vector(all_tokens[i])
            if embedding is None:
                embedding = np.ones((300)) * (-1)
            embeddings_matrix[i] = embedding

        document = TableDetectDocument(embeddings_matrix, rect_matrix,
                                       neighbor_distance_matrix,
                                       neighbor_graph, classes, conv_features,
                                       inside_same_table)
        with open(file_name, 'wb') as f:
            pickle.dump(document, f, pickle.HIGHEST_PROTOCOL)
Esempio n. 5
0
    def see_table(self, table, increment):
        print("Converting doc", self.png_path)

        table_attributes = table.attrib
        tx1 = int(table_attributes['x0'])
        ty1 = int(table_attributes['y0'])
        tx2 = int(table_attributes['x1'])
        ty2 = int(table_attributes['y1'])

        image_table_cropped = self.image[ty1:ty2 + 1, tx1:tx2 + 1]

        # _, _, 0 = row share
        # _, _, 1 = column share
        # _, _, 2 = cell share
        data_image = np.zeros((self.rows, self.cols, 3), dtype=np.int32)

        rows_xml = table.findall('Row')
        rows_matrix = np.zeros((len(rows_xml), 4))
        rr = 0
        last_y = ty1
        for row in rows_xml:
            row_attrib = row.attrib
            x1 = rows_matrix[rr, 0] = int(row_attrib['x0'])
            y1 = rows_matrix[rr, 1] = int(row_attrib['y0'])
            x2 = rows_matrix[rr, 2] = int(row_attrib['x1'])
            y2 = rows_matrix[rr, 3] = int(row_attrib['y1'])
            rr += 1
            data_image[last_y:y1 + 1, x1:x2 + 1, 0] = rr
            last_y = y1 + 1
        data_image[last_y:ty2, tx1:tx2 + 1, 0] = rr

        columns_xml = table.findall('Column')
        cols_matrix = np.zeros((len(columns_xml), 4))
        cc = 0
        last_x = tx1
        for col in columns_xml:
            col_attrib = col.attrib
            x1 = cols_matrix[cc, 0] = int(col_attrib['x0'])
            y1 = cols_matrix[cc, 1] = int(col_attrib['y0'])
            x2 = cols_matrix[cc, 2] = int(col_attrib['x1'])
            y2 = cols_matrix[cc, 3] = int(col_attrib['y1'])
            cc += 1
            data_image[y1:y2 + 1, last_x:x1 + 1, 1] = cc
            last_x = x1 + 1
        data_image[ty1:ty2, last_x:tx2, 1] = cc

        cells_xml = table.findall('Cell')
        ll = 0
        for cell_xml in cells_xml:
            bounding_box = cell_xml.attrib
            if bounding_box['dontCare'] == 'true':
                continue
            x1 = int(bounding_box['x0'])
            y1 = int(bounding_box['y0'])
            x2 = int(bounding_box['x1'])
            y2 = int(bounding_box['y1'])
            ll += 1
            data_image[y1:y2 + 1, x1:x2 + 1, 2] = ll
        show_1 = ((data_image[:, :] * 100) % 256).astype(np.uint8)
        if show:
            # show_2 = ((data_image[:,:,1] * 100) % 256).astype(np.uint8)
            # show_3 = ((data_image[:,:,2] * 100) % 256).astype(np.uint8)

            # show_1 = cv2.resize(show_1, None, fx=0.25, fy=0.25)
            # cv2.imshow('rows', show_1)
            # # show_2 = cv2.resize(show_2, None, fx=0.25, fy=0.25)
            # # cv2.imshow('cols', show_2)
            # # show_3 = cv2.resize(show_3, None, fx=0.25, fy=0.25)
            # # cv2.imshow('cells', show_3)
            #
            # cv2.waitKey(0)
            pass

        all_tokens = []
        all_tokens_rects = []
        for i in range(len(self.all_tokens)):
            token = self.all_tokens[i]
            token_rect = self.all_tokens_rects[i]
            mid = [
                int(token_rect['x'] + token_rect['width'] / 2),
                int(token_rect['y'] + token_rect['height'] / 2)
            ]
            if data_image[mid[1], mid[0], 0] == 0:
                continue
            all_tokens.append(token)
            all_tokens_rects.append(token_rect)

        N = len(all_tokens)

        if N == 0:
            return  # If there are no words in the table, its useless anyway

        row_share_matrix = np.zeros((N, N))
        col_share_matrix = np.zeros((N, N))
        cell_share_matrix = np.zeros((N, N))

        neighbors_same_row = np.zeros((N, 4))
        neighbors_same_col = np.zeros((N, 4))
        neighbors_same_cell = np.zeros((N, 4))

        graph_builder = NeighborGraphBuilder(all_tokens_rects, data_image[:, :,
                                                                          0])
        M, D = graph_builder.get_neighbor_matrix()

        for i in range(N):
            left_index = int(M[i, 0])
            top_index = int(M[i, 1])
            right_index = int(M[i, 2])
            bottom_index = int(M[i, 3])

            token_rect = all_tokens_rects[i]
            mid = [
                int(token_rect['x'] + token_rect['width'] / 2),
                int(token_rect['y'] + token_rect['height'] / 2)
            ]

            if left_index != -1:
                token_rect_2 = all_tokens_rects[left_index]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share row
                if data_image[mid[1], mid[0], 0] == data_image[mid_2[1],
                                                               mid_2[0], 0]:
                    neighbors_same_row[i, 0] = 1
                # They share column
                if data_image[mid[1], mid[0], 1] == data_image[mid_2[1],
                                                               mid_2[0], 1]:
                    neighbors_same_col[i, 0] = 1
                # They share cell
                if data_image[mid[1], mid[0], 2] == data_image[mid_2[1],
                                                               mid_2[0], 2]:
                    neighbors_same_cell[i, 0] = 1

            if top_index != -1:
                token_rect_2 = all_tokens_rects[top_index]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share row
                if data_image[mid[1], mid[0], 0] == data_image[mid_2[1],
                                                               mid_2[0], 0]:
                    neighbors_same_row[i, 1] = 1
                # They share column
                if data_image[mid[1], mid[0], 1] == data_image[mid_2[1],
                                                               mid_2[0], 1]:
                    neighbors_same_col[i, 1] = 1
                # They share cell
                if data_image[mid[1], mid[0], 2] == data_image[mid_2[1],
                                                               mid_2[0], 2]:
                    neighbors_same_cell[i, 1] = 1

            if right_index != -1:
                token_rect_2 = all_tokens_rects[right_index]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share row
                if data_image[mid[1], mid[0], 0] == data_image[mid_2[1],
                                                               mid_2[0], 0]:
                    neighbors_same_row[i, 2] = 1
                # They share column
                if data_image[mid[1], mid[0], 1] == data_image[mid_2[1],
                                                               mid_2[0], 1]:
                    neighbors_same_col[i, 2] = 1
                # They share cell
                if data_image[mid[1], mid[0], 2] == data_image[mid_2[1],
                                                               mid_2[0], 2]:
                    neighbors_same_cell[i, 2] = 1

            if bottom_index != -1:
                token_rect_2 = all_tokens_rects[bottom_index]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share row
                if data_image[mid[1], mid[0], 0] == data_image[mid_2[1],
                                                               mid_2[0], 0]:
                    neighbors_same_row[i, 3] = 1
                # They share column
                if data_image[mid[1], mid[0], 1] == data_image[mid_2[1],
                                                               mid_2[0], 1]:
                    neighbors_same_col[i, 3] = 1
                # They share cell
                if data_image[mid[1], mid[0], 2] == data_image[mid_2[1],
                                                               mid_2[0], 2]:
                    neighbors_same_cell[i, 3] = 1

        for i in range(N):
            token = all_tokens[i]
            token_rect = all_tokens_rects[i]
            mid = [
                int(token_rect['x'] + token_rect['width'] / 2),
                int(token_rect['y'] + token_rect['height'] / 2)
            ]
            for j in range(N):
                token_2 = all_tokens[j]
                token_rect_2 = all_tokens_rects[j]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share row
                if data_image[mid[1], mid[0], 0] == data_image[mid_2[1],
                                                               mid_2[0], 0]:
                    row_share_matrix[i, j] = 1
                # They share column
                if data_image[mid[1], mid[0], 1] == data_image[mid_2[1],
                                                               mid_2[0], 1]:
                    col_share_matrix[i, j] = 1
                # They share cell
                if data_image[mid[1], mid[0], 2] == data_image[mid_2[1],
                                                               mid_2[0], 2]:
                    cell_share_matrix[i, j] = 1

        sorted_path_full = self.sorted_path + "-%d" % increment
        if not dont_output:
            if not os.path.exists(sorted_path_full):
                os.mkdir(sorted_path_full)

        cv2.imwrite(os.path.join(sorted_path_full, 'visual.png'), show_1)

        # To place input vectors at respective spatial coordinates
        input_tensor = np.zeros((256, 256, 308)).astype(np.float64)
        # Same zone or not, 0 for not, 1 for yes
        output_tensor = np.zeros((256, 256, 4)).astype(np.float64)
        # Whether there was a word here or not
        # output_tensor_word_mask = np.zeros((256, 256)).astype(np.float64)
        output_tensor_word_mask = np.zeros((256, 256)).astype(np.float64)

        output_tensor_zone_mask = np.ones((256, 256), dtype=np.float32)

        table_width = tx2 - tx1
        table_height = ty2 - ty1
        rgb = np.zeros((256, 256, 3))
        glove_not_found = 0.0
        for i in range(N):
            token_rect = all_tokens_rects[i]

            # Source coordinates of top left of tokens
            cx = token_rect['x'] - tx1
            cy = token_rect['y'] - ty1
            cw = token_rect['width']
            ch = token_rect['height']

            distances_vector = D[i]

            # Get the GloVe reading
            embedding = self.glove_reader.get_vector(all_tokens[i])
            if embedding is None:
                embedding = np.ones((300)) * (-1)
                glove_not_found += 1

            positional = np.array([
                cx / table_width, cy / table_height, cw / table_width,
                ch / table_height, distances_vector[0] / table_width,
                distances_vector[1] / table_height,
                distances_vector[2] / table_width,
                distances_vector[3] / table_height
            ])

            # Destination coordinates on 256x256 scale and place there
            nx = math.floor(256.0 * cx / table_width)
            ny = math.floor(256.0 * cy / table_height)
            input_tensor[ny, nx] = np.concatenate((embedding, positional))

            # From the neighbor graph
            output_tensor[ny, nx] = np.array([
                neighbors_same_cell[i, 0], neighbors_same_cell[i, 1],
                neighbors_same_cell[i, 2], neighbors_same_cell[i, 3]
            ])

            if neighbors_same_cell[i, 0] == 1 or neighbors_same_cell[i,
                                                                     1] == 1:
                rgb[ny, nx] = np.array([0, 0, 255])
            else:
                rgb[ny, nx] = np.array([255, 255, 255])
                # Set mask to 1
                # output_tensor_word_mask[ny, nx] =1
                # print (output_tensor_word_mask[ny, nx])

            output_tensor_word_mask[ny, nx] = 1

        if glove_not_found / N > 0.3:
            print("WARNING: GloVe not found ratio", glove_not_found / N)

        # Output debugging visual file for zone mask
        segmentation_visualize_path = os.path.join(sorted_path_full,
                                                   'visual_segment.png')
        cv2.imwrite(segmentation_visualize_path,
                    (output_tensor_zone_mask * 255).astype(np.uint8))

        # Output debugging visual image for word mask
        word_mask_path = os.path.join(sorted_path_full, 'visual_word_mask.png')
        output_tensor_word_mask_temp = (rgb.transpose(
            (2, 0, 1)) * output_tensor_zone_mask).transpose(1, 2, 0)
        # output_tensor_word_mask_temp=rgb*np.repeat(output_tensor_zone_mask,3).reshape((256,256,3))

        # output_tensor_zone_mask_temp  = np.resize(output_tensor_zone_mask, (256, 256, 3))

        # output_tensor_word_mask=np.multiply(rgb,output_tensor_zone_mask_temp )
        cv2.imwrite(word_mask_path, rgb.astype(np.uint8))
        word_mask_path_1 = os.path.join(sorted_path_full,
                                        'visual_word_mask_masked.png')
        cv2.imwrite(word_mask_path_1,
                    output_tensor_word_mask_temp.astype(np.uint8))
        # cv2.imwrite(word_mask_path, (output_tensor_word_mask *255).astype(np.uint8))

        cv2.imwrite(os.path.join(sorted_path_full, 'table_cropped.png'),
                    image_table_cropped)

        # Dump the content to pickle file. The file is compressed by gzip.
        dump_path = os.path.join(sorted_path_full, '__dump__.pklz')
        document = TableParseDocument(input_tensor, output_tensor,
                                      output_tensor_word_mask,
                                      output_tensor_zone_mask)
        f = gzip.open(dump_path, 'wb')
        pickle.dump(document, f)
        f.close()
Esempio n. 6
0
    def execute_tokens(self):
        # To get local neighbors of each token: Left, right, top, bottom
        graph_builder = NeighborGraphBuilder(self.all_tokens_rects,
                                             self.image[:, :, 0])
        # M is the indices graph and D is distance matrix
        M, D = graph_builder.get_neighbor_matrix()

        N = len(self.all_tokens)

        neighbors_same_zone = np.zeros((N, 4))

        for i in range(N):
            left_index = int(M[i, 0])
            top_index = int(M[i, 1])
            right_index = int(M[i, 2])
            bottom_index = int(M[i, 3])

            token_rect = self.all_tokens_rects[i]
            mid = [
                int(token_rect['x'] + token_rect['width'] / 2),
                int(token_rect['y'] + token_rect['height'] / 2)
            ]

            if left_index != -1:
                token_rect_2 = self.all_tokens_rects[left_index]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share zone
                if self.zone_segmentation[
                        mid[1], mid[0]] == self.zone_segmentation[mid_2[1],
                                                                  mid_2[0]]:
                    neighbors_same_zone[i, 0] = 1
                else:
                    neighbors_same_zone[i, 0] = 110
            else:
                neighbors_same_zone[i, 0] = 1

            if top_index != -1:
                token_rect_2 = self.all_tokens_rects[top_index]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share zone
                if self.zone_segmentation[
                        mid[1], mid[0]] == self.zone_segmentation[mid_2[1],
                                                                  mid_2[0]]:
                    neighbors_same_zone[i, 1] = 1
                else:
                    neighbors_same_zone[i, 1] = 110
            else:
                neighbors_same_zone[i, 0] = 1

            if right_index != -1:
                token_rect_2 = self.all_tokens_rects[right_index]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share zone
                if self.zone_segmentation[
                        mid[1], mid[0]] == self.zone_segmentation[mid_2[1],
                                                                  mid_2[0]]:
                    neighbors_same_zone[i, 2] = 1
                else:
                    neighbors_same_zone[i, 1] = 110
            else:
                neighbors_same_zone[i, 0] = 1

            if bottom_index != -1:
                token_rect_2 = self.all_tokens_rects[bottom_index]
                mid_2 = [
                    int(token_rect_2['x'] + token_rect_2['width'] / 2),
                    int(token_rect_2['y'] + token_rect_2['height'] / 2)
                ]
                # They share zone
                if self.zone_segmentation[
                        mid[1], mid[0]] == self.zone_segmentation[mid_2[1],
                                                                  mid_2[0]]:
                    neighbors_same_zone[i, 3] = 1
                else:
                    neighbors_same_zone[i, 1] = 110

            else:
                neighbors_same_zone[i, 0] = 1

        # To place input vectors at respective spatial coordinates
        input_tensor = np.zeros((256, 256, 308)).astype(np.float64)
        # Same zone or not, 0 for not, 1 for yes
        output_tensor = np.zeros((256, 256, 4)).astype(np.float64)
        # Whether there was a word here or not
        # output_tensor_word_mask = np.zeros((256, 256)).astype(np.float64)
        output_tensor_word_mask = np.zeros((256, 256)).astype(np.float64)

        # Whether there was a zone here or not
        self.zone_segmentation[self.zone_segmentation != 0] = 1

        output_tensor_zone_mask = cv2.resize(self.zone_segmentation,
                                             (256, 256))
        # output_tensor_zone_mask = output_tensor_zone_mask_temp.reshape(-1,3)
        for i in range(N):
            token_rect = self.all_tokens_rects[i]
            #            mid = [int(token_rect['x'] + token_rect['width'] / 2), int(token_rect['y'] + token_rect['height'] / 2)]
            # Source coordinates of top left of tokens
            cx = token_rect['x']
            cy = token_rect['y']
            cw = token_rect['width']
            ch = token_rect['height']
            #            token_rect_2 = self.all_tokens_rects[top_index]
            #            mid_2 = [int(token_rect_2['x'] + token_rect_2['width'] / 2),
            #                         int(token_rect_2['y'] + token_rect_2['height'] / 2)]

            distances_vector = D[i]

            # Get the GloVe reading
            embedding = self.glove_reader.get_vector(self.all_tokens[i])
            if embedding is None:
                embedding = np.ones((300)) * (-1)

            positional = np.array([
                cx / self.width, cx / self.height, cw / self.width,
                ch / self.width, distances_vector[0] / self.width,
                distances_vector[1] / self.height,
                distances_vector[2] / self.width,
                distances_vector[3] / self.height
            ])

            # Destination coordinates on 256x256 scale and place there
            nx = math.floor(256.0 * cx / self.width)
            ny = math.floor(256.0 * cy / self.height)
            input_tensor[ny, nx] = np.concatenate((embedding, positional))

            # From the neighbor graph
            output_tensor[ny, nx] = np.array([
                neighbors_same_zone[i, 0], neighbors_same_zone[i, 1],
                neighbors_same_zone[i, 2], neighbors_same_zone[i, 3]
            ])

            if any(x == 110 for x in output_tensor[ny, nx]):
                output_tensor_word_mask[ny, nx] = 110
            else:
                output_tensor_word_mask[ny, nx] = 1
                # Set mask to 1
                # output_tensor_word_mask[ny, nx] =1
                # print (output_tensor_word_mask[ny, nx])

        print(self.sorted_path)
        rgb = np.zeros((256, 256, 3))
        for i in range(output_tensor_word_mask.shape[0]):
            for j in range(output_tensor_word_mask.shape[1]):
                if output_tensor_word_mask[i, j] == 1.0:
                    rgb[i, j, 0] = 255
                    rgb[i, j, 1] = 255
                    rgb[i, j, 2] = 255
                elif output_tensor_word_mask[i, j] == 110.0:
                    rgb[i, j, 0] = 255
                    rgb[i, j, 1] = 0
                    rgb[i, j, 2] = 0

        # Output debugging visual file for zone mask
        segmentation_visualize_path = os.path.join(self.sorted_path,
                                                   'visual_segment.png')
        cv2.imwrite(segmentation_visualize_path,
                    (output_tensor_zone_mask * 255).astype(np.uint8))

        # Output debugging visual image for word mask
        word_mask_path = os.path.join(self.sorted_path, 'visual_word_mask.png')
        output_tensor_word_mask_temp = (rgb.transpose(
            (2, 0, 1)) * output_tensor_zone_mask).transpose(1, 2, 0)
        # output_tensor_word_mask_temp=rgb*np.repeat(output_tensor_zone_mask,3).reshape((256,256,3))
        print(output_tensor_word_mask_temp.shape)

        # output_tensor_zone_mask_temp  = np.resize(output_tensor_zone_mask, (256, 256, 3))

        # output_tensor_word_mask=np.multiply(rgb,output_tensor_zone_mask_temp )
        matplotlib.image.imsave(word_mask_path, rgb.astype(np.uint8))
        word_mask_path_1 = os.path.join(self.sorted_path,
                                        'visual_word_mask_masked.png')
        matplotlib.image.imsave(word_mask_path_1,
                                output_tensor_word_mask_temp.astype(np.uint8))
        # cv2.imwrite(word_mask_path, (output_tensor_word_mask *255).astype(np.uint8))

        # Dump the content to pickle file. The file is compressed by gzip.
        dump_path = os.path.join(self.sorted_path, '__dump__.pklz')
        document = ZoneSegmentDocument(input_tensor, output_tensor,
                                       output_tensor_word_mask,
                                       output_tensor_zone_mask)
        f = gzip.open(dump_path, 'wb')
        pickle.dump(document, f)
        f.close()