Exemple #1
0
def overlap_expand(math_regions):

    print('Number of math regions ', len(math_regions))

    if type(math_regions) != type([]):
        math_regions = math_regions.tolist()

    obsolete = []

    for i in range(len(math_regions)):
        for j in range(i + 1, len(math_regions)):
            # print(i,j)
            if box_utils.intersects(math_regions[i], math_regions[j]):
                math_regions[i][0] = min(math_regions[i][0],
                                         math_regions[j][0])
                math_regions[i][1] = min(math_regions[i][1],
                                         math_regions[j][1])
                math_regions[i][2] = max(math_regions[i][2],
                                         math_regions[j][2])
                math_regions[i][3] = max(math_regions[i][3],
                                         math_regions[j][3])
                obsolete.append(j)

    math_regions = [i for j, i in enumerate(math_regions) if j not in obsolete]

    return math_regions
Exemple #2
0
    def generate_metadata(self):

        for id in self.ids:
            math_patches = []

            height, width, channels = self.images[id[1]].shape
            current_page_boxes = self.math_ground_truth[id[1]]
            n_horizontal = np.ceil(width / self.window)  # 4
            n_vertical = np.ceil(height / self.window)  # 5

            h = np.arange(0, n_horizontal - 1 + self.stride, self.stride)
            v = np.arange(0, n_vertical - 1 + self.stride, self.stride)
            crop_size = self.window

            if (self.split == 'train' or self.split == 'validate')  and self.is_math[id[1]]:

                for i in h:
                    for j in v:
                        x_l = int(np.round(crop_size * i))
                        x_h = x_l + self.window

                        y_l = int(np.round(crop_size * j))
                        y_h = y_l + self.window

                        # left, top, right, bottom
                        image_box = [x_l, y_l, x_h, y_h]

                        current_page_boxes = copy.deepcopy(self.math_ground_truth[id[1]])

                        # if math intersects only consider the region which
                        # is part of the current bounding box
                        for box in current_page_boxes:
                            if box_utils.intersects(image_box, box):
                                # left, top, right, bottom
                                # y increases downwards

                                # crop the boxes to fit into image region
                                box[0] = max(x_l, box[0])
                                box[1] = max(y_l, box[1])
                                box[2] = min(x_h, box[2])
                                box[3] = min(y_h, box[3])

                                # # Translate to origin
                                box[0] = box[0] - x_l
                                box[2] = box[2] - x_l

                                box[1] = box[1] - y_l
                                box[3] = box[3] - y_l

                                if feature_extractor.width(box) > 0 and feature_extractor.height(box) > 0:
                                    self.metadata.append([id[1], x_l, y_l])
                                    break
            elif self.split=='test':
                for i in h:
                    for j in v:
                        x_l = int(np.round(crop_size * i))
                        y_l = int(np.round(crop_size * j))
                        self.metadata.append([id[1], x_l, y_l])
Exemple #3
0
    def gen_targets(self, index):

        metadata = self.metadata[index]

        x_l = metadata[1]
        y_l = metadata[2]
        x_h = x_l + self.window
        y_h = y_l + self.window

        current_page_boxes = copy.deepcopy(self.math_ground_truth[metadata[0]])
        targets = []

        image_box = [x_l, y_l, x_h, y_h]

        # if math intersects only consider the region which
        # is part of the current bounding box
        for box in current_page_boxes:
            if box_utils.intersects(image_box, box):
                # left, top, right, bottom
                # y increases downwards

                # crop the boxes to fit into image region
                box[0] = max(x_l, box[0])
                box[1] = max(y_l, box[1])
                box[2] = min(x_h, box[2])
                box[3] = min(y_h, box[3])

                # # Translate to origin
                box[0] = box[0] - x_l
                box[2] = box[2] - x_l

                box[1] = box[1] - y_l
                box[3] = box[3] - y_l

                if feature_extractor.width(
                        box) > 0 and feature_extractor.height(box) > 0:
                    targets.append(box)

        # It is done only for testing, where we do not care about targets
        # This avoids IndexError: too many indices for array
        # TODO: refactor in future
        if len(targets) == 0:
            targets = [[-1, -1, -1, -1]]

        return targets