Exemple #1
0
 def get_candidates_and_features_page_num(self, page_num):
     elems = self.elems[page_num]
     font_stat = self.font_stats[page_num]
     lines_bboxes = self.get_candidates_lines(page_num, elems)
     boxes = []
     # Filter out bboxes that are zero width or height
     for bbox in lines_bboxes:
         if (bbox[5] - bbox[3] > 0 and bbox[6] - bbox[4] > 0):
             boxes += [bbox]
     alignments_bboxes, alignment_features = self.get_candidates_alignments(
         page_num, elems)
     self.log.info(
         "Page Num: {}, Line bboxes: {}, Alignment bboxes: {}".format(
             page_num, len(lines_bboxes), len(alignments_bboxes)))
     alignment_features += get_alignment_features(lines_bboxes, elems,
                                                  font_stat)
     # Filter out bboxes that are zero width or height
     for bbox in alignments_bboxes:
         if (bbox[5] - bbox[3] > 0 and bbox[6] - bbox[4] > 0):
             boxes += [bbox]
     #  boxes = alignments_bboxes + lines_bboxes
     if len(boxes) == 0:
         return [], []
     lines_features = get_lines_features(boxes, elems)
     features = np.concatenate(
         (np.array(alignment_features), np.array(lines_features)), axis=1)
     return boxes, features
Exemple #2
0
    def get_candidates_and_features_page_num(self, page_num):
        elems = self.elems[page_num]
        #  font_stat = self.font_stats[page_num]
        #  lines_bboxes = self.get_candidates_lines(page_num, elems)
        alignments_bboxes, alignment_features = self.get_candidates_alignments(
            page_num, elems)

        boxes = alignments_bboxes
        if len(boxes) == 0:
            self.log.info("No boxes were found on page {}.".format(page_num))
            return [], []

        lines_features = get_lines_features(boxes, elems)
        features = np.concatenate(
            (np.array(alignment_features), np.array(lines_features)), axis=1)
        return boxes, features