def overlap_expand(math_regions): print('Number of math regions ', len(math_regions)) if type(math_regions) != type([]): math_regions = math_regions.tolist() obsolete = [] for i in range(len(math_regions)): for j in range(i + 1, len(math_regions)): # print(i,j) if box_utils.intersects(math_regions[i], math_regions[j]): math_regions[i][0] = min(math_regions[i][0], math_regions[j][0]) math_regions[i][1] = min(math_regions[i][1], math_regions[j][1]) math_regions[i][2] = max(math_regions[i][2], math_regions[j][2]) math_regions[i][3] = max(math_regions[i][3], math_regions[j][3]) obsolete.append(j) math_regions = [i for j, i in enumerate(math_regions) if j not in obsolete] return math_regions
def generate_metadata(self): for id in self.ids: math_patches = [] height, width, channels = self.images[id[1]].shape current_page_boxes = self.math_ground_truth[id[1]] n_horizontal = np.ceil(width / self.window) # 4 n_vertical = np.ceil(height / self.window) # 5 h = np.arange(0, n_horizontal - 1 + self.stride, self.stride) v = np.arange(0, n_vertical - 1 + self.stride, self.stride) crop_size = self.window if (self.split == 'train' or self.split == 'validate') and self.is_math[id[1]]: for i in h: for j in v: x_l = int(np.round(crop_size * i)) x_h = x_l + self.window y_l = int(np.round(crop_size * j)) y_h = y_l + self.window # left, top, right, bottom image_box = [x_l, y_l, x_h, y_h] current_page_boxes = copy.deepcopy(self.math_ground_truth[id[1]]) # if math intersects only consider the region which # is part of the current bounding box for box in current_page_boxes: if box_utils.intersects(image_box, box): # left, top, right, bottom # y increases downwards # crop the boxes to fit into image region box[0] = max(x_l, box[0]) box[1] = max(y_l, box[1]) box[2] = min(x_h, box[2]) box[3] = min(y_h, box[3]) # # Translate to origin box[0] = box[0] - x_l box[2] = box[2] - x_l box[1] = box[1] - y_l box[3] = box[3] - y_l if feature_extractor.width(box) > 0 and feature_extractor.height(box) > 0: self.metadata.append([id[1], x_l, y_l]) break elif self.split=='test': for i in h: for j in v: x_l = int(np.round(crop_size * i)) y_l = int(np.round(crop_size * j)) self.metadata.append([id[1], x_l, y_l])
def gen_targets(self, index): metadata = self.metadata[index] x_l = metadata[1] y_l = metadata[2] x_h = x_l + self.window y_h = y_l + self.window current_page_boxes = copy.deepcopy(self.math_ground_truth[metadata[0]]) targets = [] image_box = [x_l, y_l, x_h, y_h] # if math intersects only consider the region which # is part of the current bounding box for box in current_page_boxes: if box_utils.intersects(image_box, box): # left, top, right, bottom # y increases downwards # crop the boxes to fit into image region box[0] = max(x_l, box[0]) box[1] = max(y_l, box[1]) box[2] = min(x_h, box[2]) box[3] = min(y_h, box[3]) # # Translate to origin box[0] = box[0] - x_l box[2] = box[2] - x_l box[1] = box[1] - y_l box[3] = box[3] - y_l if feature_extractor.width( box) > 0 and feature_extractor.height(box) > 0: targets.append(box) # It is done only for testing, where we do not care about targets # This avoids IndexError: too many indices for array # TODO: refactor in future if len(targets) == 0: targets = [[-1, -1, -1, -1]] return targets