def remove_hlines(binary, scale, maxsize=10): """ Removes horizontal black lines that only interfere with page segmentation. Args: binary (numpy.array): scale (float): maxsize (int): maximum size of removed lines Returns: numpy.array containing the filtered image. """ labels, _ = morph.label(binary) objects = morph.find_objects(labels) for i, b in enumerate(objects): if sl.width(b) > maxsize*scale: labels[b][labels[b] == i+1] = 0 return np.array(labels != 0, 'B')
def remove_hlines(binary, scale, maxsize=10): """ Removes horizontal black lines that only interfere with page segmentation. Args: binary (numpy.array): scale (float): maxsize (int): maximum size of removed lines Returns: numpy.array containing the filtered image. """ labels, _ = morph.label(binary) objects = morph.find_objects(labels) for i, b in enumerate(objects): if sl.width(b) > maxsize * scale: labels[b][labels[b] == i + 1] = 0 return np.array(labels != 0, 'B')
def remove_hlines(binary: np.ndarray, scale: float, maxsize: int = 10) -> np.ndarray: """ Removes horizontal black lines that only interfere with page segmentation. Args: binary: scale: maxsize: maximum size of removed lines Returns: numpy.ndarray containing the filtered image. """ logger.debug('Filtering horizontal lines') labels, _ = morph.label(binary) objects = morph.find_objects(labels) for i, b in enumerate(objects): if sl.width(b) > maxsize * scale: labels[b][labels[b] == i + 1] = 0 return np.array(labels != 0, 'B')