Ejemplo n.º 1
0
def scale(params):

    try:
        args, math_regions, pdf_name, page_num = params
        print('Processing ', pdf_name, ' > ', page_num)

        image = cv2.imread(
            os.path.join(args.home_images, pdf_name,
                         str(int(page_num + 1)) + ".png"))

        height = image.shape[0]
        width = image.shape[1]

        new_math = []

        for math in math_regions:
            box = [0, 0, 0, 0]
            box[0] = math[0] * width / 512  # 512 is ip image size
            box[1] = math[1] * height / 512
            box[2] = math[2] * width / 512
            box[3] = math[3] * height / 512

            if feature_extractor.width(box) > 0 and feature_extractor.height(
                    box) > 0:
                new_math.append(box)

        return new_math
    except Exception as e:
        print('Error while processing ', pdf_name, ' > ', page_num,
              sys.exc_info())
        return []
Ejemplo n.º 2
0
def adjust_char(params):
    '''
    Adjust the character bounding boxes
    '''
    try:
        args, char_regions, pdf_name, page_num = params
        print('Char processing ', pdf_name, ' > ', page_num)

        image = cv2.imread(os.path.join(args.home_images, pdf_name, str(int(page_num) + 1) + ".png"))
        im_bw = fit_box.convert_to_binary(image)

        new_chars = []

        for char in char_regions:

            bb_char = [char[2],char[3],char[4],char[5]]
            bb_char = [int(float(k)) for k in bb_char]
            box = fit_box.adjust_box(im_bw, bb_char)
            if feature_extractor.width(box) > 0 and feature_extractor.height(box) > 0:
                char[1] = box[0]
                char[2] = box[1]
                char[3] = box[2]
                char[4] = box[3]
                new_chars.append(char)

        return new_chars
    except Exception as e:
        print('Error while processing ', pdf_name, ' > ', page_num, sys.exc_info())
        return []
Ejemplo n.º 3
0
    def generate_metadata(self):

        for id in self.ids:
            math_patches = []

            height, width, channels = self.images[id[1]].shape
            current_page_boxes = self.math_ground_truth[id[1]]
            n_horizontal = np.ceil(width / self.window)  # 4
            n_vertical = np.ceil(height / self.window)  # 5

            h = np.arange(0, n_horizontal - 1 + self.stride, self.stride)
            v = np.arange(0, n_vertical - 1 + self.stride, self.stride)
            crop_size = self.window

            if (self.split == 'train' or self.split == 'validate')  and self.is_math[id[1]]:

                for i in h:
                    for j in v:
                        x_l = int(np.round(crop_size * i))
                        x_h = x_l + self.window

                        y_l = int(np.round(crop_size * j))
                        y_h = y_l + self.window

                        # left, top, right, bottom
                        image_box = [x_l, y_l, x_h, y_h]

                        current_page_boxes = copy.deepcopy(self.math_ground_truth[id[1]])

                        # if math intersects only consider the region which
                        # is part of the current bounding box
                        for box in current_page_boxes:
                            if box_utils.intersects(image_box, box):
                                # left, top, right, bottom
                                # y increases downwards

                                # crop the boxes to fit into image region
                                box[0] = max(x_l, box[0])
                                box[1] = max(y_l, box[1])
                                box[2] = min(x_h, box[2])
                                box[3] = min(y_h, box[3])

                                # # Translate to origin
                                box[0] = box[0] - x_l
                                box[2] = box[2] - x_l

                                box[1] = box[1] - y_l
                                box[3] = box[3] - y_l

                                if feature_extractor.width(box) > 0 and feature_extractor.height(box) > 0:
                                    self.metadata.append([id[1], x_l, y_l])
                                    break
            elif self.split=='test':
                for i in h:
                    for j in v:
                        x_l = int(np.round(crop_size * i))
                        y_l = int(np.round(crop_size * j))
                        self.metadata.append([id[1], x_l, y_l])
Ejemplo n.º 4
0
def voting_algo(math_regions,
                char_data,
                image,
                pdf_name,
                page_num,
                output_dir,
                algorithm='equal',
                thresh_votes=20):

    if algorithm == 'char_algo':
        return char_algo(math_regions, char_data, image, algorithm,
                         thresh_votes)

    if algorithm == 'clustering':
        return clustering(math_regions, char_data, image, algorithm,
                          thresh_votes)

    # vote for the regions
    votes = vote_for_regions(math_regions, image, algorithm, thresh_votes)

    if projections == 1:
        votes[rows_with_at_least_k_black_pixels(image)] = 0

    im_bw = convert_to_binary(image)
    structure = np.ones((3, 3), dtype=np.int)
    labeled, ncomponents = label(votes, structure)

    # found the boxes. Now extract the co-ordinates left,top,right,bottom
    boxes = []
    indices = np.indices(votes.shape).T[:, :, [1, 0]]

    for i in range(ncomponents):

        labels = (labeled == (i + 1))
        pixels = indices[labels.T]

        if len(pixels) < 1:
            continue

        box = [
            min(pixels[:, 0]),
            min(pixels[:, 1]),
            max(pixels[:, 0]),
            max(pixels[:, 1])
        ]

        # expansion to correctly fit the region
        box = fit_box.adjust_box(im_bw, box)

        # if box has 0 width or height, do not add it in the final detections
        if feature_extractor.width(box) < 1 or feature_extractor.height(
                box) < 1:
            continue

        boxes.append(box)

    return boxes
Ejemplo n.º 5
0
def voting_algo(params):

    args, math_regions, pdf_name, page_num = params
    print('Processing ', pdf_name, ' > ', page_num)

    image = cv2.imread(
        os.path.join(args.home_images, pdf_name,
                     str(int(page_num + 1)) + ".png"))

    if args.preprocess:
        math_regions = preprocess_math_regions(math_regions, image)

    # vote for the regions
    votes = vote_for_regions(args, math_regions, image)

    im_bw = convert_to_binary(image)
    structure = np.ones((3, 3), dtype=np.int)
    labeled, ncomponents = label(votes, structure)

    # found the boxes. Now extract the co-ordinates left,top,right,bottom
    boxes = []
    indices = np.indices(votes.shape).T[:, :, [1, 0]]

    for i in range(ncomponents):

        labels = (labeled == (i + 1))
        pixels = indices[labels.T]

        if len(pixels) < 1:
            continue

        box = [
            min(pixels[:, 0]),
            min(pixels[:, 1]),
            max(pixels[:, 0]),
            max(pixels[:, 1])
        ]

        if args.postprocess:
            # expansion to correctly fit the region
            box = fit_box.adjust_box(im_bw, box)

        # if box has 0 width or height, do not add it in the final detections
        if feature_extractor.width(box) < 1 or feature_extractor.height(
                box) < 1:
            continue

        boxes.append(box)

    return boxes
Ejemplo n.º 6
0
    def Voting(self, image, math_regions):
        original_width = image.shape[3]
        original_height = image.shape[2]
        thresh_votes = 30

        votes = np.zeros(shape=(original_height, original_width))

        for box in math_regions:
            votes[int(box[1]):int(box[3]), int(box[0]):int(box[2])] = \
                votes[int(box[1]):int(box[3]), int(box[0]):int(box[2])] + 1

        votes[votes < thresh_votes] = 0
        votes[votes >= thresh_votes] = 1

        im_bw = convert_to_binary(image)

        structure = np.ones((3, 3), dtype=np.int)
        labeled, ncomponents = label(votes, structure)

        boxes = []
        indices = np.indices(votes.shape).T[:, :, [1, 0]]

        for i in range(ncomponents):

            labels = (labeled == (i + 1))
            pixels = indices[labels.T]

            if len(pixels) < 1:
                continue

            box = [
                min(pixels[:, 0]),
                min(pixels[:, 1]),
                max(pixels[:, 0]),
                max(pixels[:, 1])
            ]

            # if args.postprocess:
            # expansion to correctly fit the region
            box = fit_box.adjust_box(im_bw, box)

            # if box has 0 width or height, do not add it in the final detections
            if feature_extractor.width(box) < 1 or feature_extractor.height(
                    box) < 1:
                continue

            boxes.append(box)
        return boxes
Ejemplo n.º 7
0
def normalize(params):

    args, math_regions, pdf_name, page_num = params
    print('Processing ', pdf_name, ' > ', page_num)

    image = cv2.imread(os.path.join(args.home_images, pdf_name, str(int(page_num + 1)) + ".png"))
    im_bw = fit_box.convert_to_binary(image)

    new_math = []
    for math in math_regions:
        box = [math[0]/im_bw.shape[1], math[1]/im_bw.shape[0],
               math[2]/im_bw.shape[1], math[3]/im_bw.shape[0]]#fit_box.adjust_box(im_bw, math)

        if feature_extractor.width(box) > 0 and feature_extractor.height(box) > 0:
            new_math.append(box)

    return new_math
Ejemplo n.º 8
0
    def gen_targets(self, index):

        metadata = self.metadata[index]

        x_l = metadata[1]
        y_l = metadata[2]
        x_h = x_l + self.window
        y_h = y_l + self.window

        current_page_boxes = copy.deepcopy(self.math_ground_truth[metadata[0]])
        targets = []

        image_box = [x_l, y_l, x_h, y_h]

        # if math intersects only consider the region which
        # is part of the current bounding box
        for box in current_page_boxes:
            if box_utils.intersects(image_box, box):
                # left, top, right, bottom
                # y increases downwards

                # crop the boxes to fit into image region
                box[0] = max(x_l, box[0])
                box[1] = max(y_l, box[1])
                box[2] = min(x_h, box[2])
                box[3] = min(y_h, box[3])

                # # Translate to origin
                box[0] = box[0] - x_l
                box[2] = box[2] - x_l

                box[1] = box[1] - y_l
                box[3] = box[3] - y_l

                if feature_extractor.width(
                        box) > 0 and feature_extractor.height(box) > 0:
                    targets.append(box)

        # It is done only for testing, where we do not care about targets
        # This avoids IndexError: too many indices for array
        # TODO: refactor in future
        if len(targets) == 0:
            targets = [[-1, -1, -1, -1]]

        return targets
Ejemplo n.º 9
0
def adjust(params):
    '''
    Fit the bounding boxes to the characters
    '''
    args, math_regions, pdf_name, page_num = params
    print('Processing ', pdf_name, ' > ', page_num)

    image = cv2.imread(os.path.join(args.home_images, pdf_name, str(int(page_num + 1)) + ".png"))
    im_bw = fit_box.convert_to_binary(image)

    new_math = []
    for math in math_regions:
        box = fit_box.adjust_box(im_bw, math)

        if feature_extractor.width(box) > 0 and feature_extractor.height(box) > 0:
            new_math.append(box)

    return new_math