Ejemplo n.º 1
0
def remove_hlines(binary, scale, maxsize=10):
    """
    Removes horizontal black lines that only interfere with page segmentation.

        Args:
            binary (numpy.array):
            scale (float):
            maxsize (int): maximum size of removed lines

        Returns:
            numpy.array containing the filtered image.

    """
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)
    for i, b in enumerate(objects):
        if sl.width(b) > maxsize*scale:
            labels[b][labels[b] == i+1] = 0
    return np.array(labels != 0, 'B')
Ejemplo n.º 2
0
def remove_hlines(binary, scale, maxsize=10):
    """
    Removes horizontal black lines that only interfere with page segmentation.

        Args:
            binary (numpy.array):
            scale (float):
            maxsize (int): maximum size of removed lines

        Returns:
            numpy.array containing the filtered image.

    """
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)
    for i, b in enumerate(objects):
        if sl.width(b) > maxsize * scale:
            labels[b][labels[b] == i + 1] = 0
    return np.array(labels != 0, 'B')
Ejemplo n.º 3
0
def remove_hlines(binary: np.ndarray,
                  scale: float,
                  maxsize: int = 10) -> np.ndarray:
    """
    Removes horizontal black lines that only interfere with page segmentation.

        Args:
            binary:
            scale:
            maxsize: maximum size of removed lines

        Returns:
            numpy.ndarray containing the filtered image.

    """
    logger.debug('Filtering horizontal lines')
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)
    for i, b in enumerate(objects):
        if sl.width(b) > maxsize * scale:
            labels[b][labels[b] == i + 1] = 0
    return np.array(labels != 0, 'B')