Beispiel #1
0
def convert_image_to_array(image: PIL.Image.Image) -> np.ndarray:
    if image.mode != "RGB":
        image = image.convert("RGB")

    (im_width, im_height) = image.size

    return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
def add_shapes(
    background: PIL.Image.Image,
    shape_img: PIL.Image.Image,
    shape_params,
) -> Tuple[List[Tuple[int, int, int, int, int]], PIL.Image.Image]:
    """Paste shapes onto background and return bboxes"""
    shape_bboxes: List[Tuple[int, int, int, int, int]] = []

    for i, shape_param in enumerate(shape_params):

        x = shape_param[-2]
        y = shape_param[-1]
        x1, y1, x2, y2 = shape_img.getbbox()
        bg_at_shape = background.crop((x1 + x, y1 + y, x2 + x, y2 + y))
        bg_at_shape.paste(shape_img, (0, 0), shape_img)
        background.paste(bg_at_shape, (x, y))
        # Slightly expand the bounding box in order to simulate variability with
        # the detection boxes. Always make the crop larger than needed because training
        # augmentations will only be able to crop down.
        dx = random.randint(0, int(0.1 * (x2 - x1)))
        dy = random.randint(0, int(0.1 * (y2 - y1)))
        x1 -= dx
        x2 += dx
        y1 -= dy
        y2 += dy

        background = background.crop((x1 + x, y1 + y, x2 + x, y2 + y))
        background = background.filter(ImageFilter.SMOOTH_MORE)
    return background.convert("RGB")
Beispiel #3
0
def _binarize_image(img: PIL.Image.Image, threshold: float) -> PIL.Image.Image:
    output = img.convert("L")
    for x in range(output.width):
        for y in range(output.height):
            output.putpixel(
                xy=(x, y),
                value=0 if output.getpixel((x, y)) < threshold else 255,
            )
    return output
Beispiel #4
0
    def preprocess_pil_image(self, pil_image: PIL.Image.Image):
        """Дообработка изображения в формате PIL"""
        # Конвертировать изображение в формат RGB, если оно еще не в этом формате.
        if pil_image.mode != "RGB":
            pil_image = pil_image.convert("RGB")

        # Изменить размер изображения на подходящий нейросети.
        pil_image = pil_image.resize((self.image_size_x, self.image_size_y))

        # Преобразовать изображения из формата PIL в трехмерный массив Numpy.
        np_array = image.img_to_array(pil_image)

        # Выполнить обработку изображения собственной функцией препроцессинга нейросети.
        return self.preprocess_input(np.expand_dims(np_array, axis=0))
Beispiel #5
0
def set_image_mpl_cmap(image: PIL.Image.Image, cmap: str):
    """
    Set a PIL.Image to use a matplotlib colour map

    See https://matplotlib.org/3.1.1/gallery/color/colormap_reference.html

    Args:
        image: Image to modify
        cmap: Matplotlib colour map name
    """
    image = image.copy()
    cmap = plt.get_cmap(cmap)
    value_levels = numpy.linspace(0, 1, 2**8)
    pallet = to_bytes(cmap(value_levels))[:, 0:3]
    image.putpalette(pallet, "RGB")
    return image.convert("RGB")
Beispiel #6
0
def prepare_img_for_numpy(img: PIL.Image.Image) -> None:
    """
    Preparing image for transforming into numpy ndarray.

    Parameters:
        img (PIL.Image.Image) : image to be prepared.
    Returns:
        prepared_img (PIL.Image.Image) : prepared image.
    """
    if not isinstance(img, PIL.Image.Image):
        raise TypeError(
            "prepare_img_for_numpy: expected image of type PIL.Image.Image, got {0}"
            .format(type(img)))

    prepared_img = img.convert("RGB")
    return prepared_img
def apply_blur(img: PIL.Image.Image,
               blur_radius: int = BLUR_blur_radius) -> PIL.Image.Image:
    """
    Applies blur for PIL.Image image.

    Parameters:
        img (PIL.Image) : image to be blurred.
        blur_radius (int) : blur blur_radius, or blur strength.
    Returns: 
        blurred_image (PIL.Image) : blurred image.
    """
    if not isinstance(img, PIL.Image.Image):
        raise TypeError(
            "apply_blur: expected img of type PIL.Image, got {0}".format(
                type(img)))

    blurred_image = img.convert("RGBA")
    blurred_image = blurred_image.filter(ImageFilter.GaussianBlur(blur_radius))
    return blurred_image
def add_shapes(
    background: PIL.Image.Image,
    shape_imgs: PIL.Image.Image,
    shape_params,
    blur_radius: int,
) -> Tuple[List[Tuple[int, int, int, int, int]], PIL.Image.Image]:
    """Paste shapes onto background and return bboxes"""
    shape_bboxes: List[Tuple[int, int, int, int, int]] = []

    for i, shape_param in enumerate(shape_params):

        x = shape_param[-2]
        y = shape_param[-1]
        shape_img = shape_imgs[i]
        shape_img = shape_img.filter(ImageFilter.GaussianBlur(1))
        x1, y1, x2, y2 = shape_img.getbbox()
        bg_at_shape = background.crop((x1 + x, y1 + y, x2 + x, y2 + y))
        bg_at_shape.paste(shape_img, (0, 0), shape_img)
        bg_at_shape = bg_at_shape.filter(ImageFilter.SMOOTH_MORE)
        background.paste(bg_at_shape, (x, y))

        im_w, im_h = background.size
        x /= im_w
        y /= im_h

        w = (x2 - x1) / im_w
        h = (y2 - y1) / im_h

        shape_bboxes.append((CLASSES.index(shape_param[0]), x, y, w, h))
        """
        shape_bboxes.append(
            (
                CLASSES.index(shape_param[2]),
                x + (0.1 * w),
                y + (0.1 * h),
                0.8 * w,
                0.8 * h,
            )
        )
        """
    return shape_bboxes, background.convert("RGB")
def apply_palette_reduction(
    img: PIL.Image.Image,
    reduced_palette_colors_count: int = REDUCED_PALETTE_COLORS_COUNT
) -> PIL.Image.Image:
    """
    Applies palette reduction for PIL.Image image.

    Parameters:
        img (PIL.Image) : image to be processed.
        reduced_palette_colors_count (int) : count of colors for processed image.
    Returns: 
        processed_image (PIL.Image) : processed image with pallete count reduced.
    """
    if not isinstance(img, PIL.Image.Image):
        raise TypeError(
            "apply_palette_reduction: expected img of type PIL.Image, got {0}".
            format(type(img)))

    processed_image = img.convert('P',
                                  palette=Image.ADAPTIVE,
                                  colors=reduced_palette_colors_count)
    return processed_image
Beispiel #10
0
def calculate_polygonal_environment(
        im: PIL.Image.Image = None,
        baselines: Sequence[Sequence[Tuple[int, int]]] = None,
        suppl_obj: Sequence[Sequence[Tuple[int, int]]] = None,
        im_feats: np.array = None,
        scale: Tuple[int, int] = None):
    """
    Given a list of baselines and an input image, calculates a polygonal
    environment around each baseline.

    Args:
        im (PIL.Image): grayscale input image (mode 'L')
        baselines (sequence): List of lists containing a single baseline per
                              entry.
        suppl_obj (sequence): List of lists containing additional polylines
                              that should be considered hard boundaries for
                              polygonizaton purposes. Can be used to prevent
                              polygonization into non-text areas such as
                              illustrations or to compute the polygonization of
                              a subset of the lines in an image.
        im_feats (numpy.array): An optional precomputed seamcarve energy map.
                                Overrides data in `im`. The default map is
                                `gaussian_filter(sobel(im), 2)`.
        scale (tuple): A 2-tuple (h, w) containing optional scale factors of
                       the input. Values of 0 are used for aspect-preserving
                       scaling. `None` skips input scaling.
    Returns:
        List of lists of coordinates. If no polygonization could be compute for
        a baseline `None` is returned instead.
    """
    if scale is not None and (scale[0] > 0 or scale[1] > 0):
        w, h = im.size
        oh, ow = scale
        if oh == 0:
            oh = int(h * ow / w)
        elif ow == 0:
            ow = int(w * oh / h)
        im = im.resize((ow, oh))
        scale = np.array((ow / w, oh / h))
        # rescale baselines
        baselines = [(np.array(bl) * scale).astype('int').tolist()
                     for bl in baselines]
        # rescale suppl_obj
        if suppl_obj is not None:
            suppl_obj = [(np.array(bl) * scale).astype('int').tolist()
                         for bl in suppl_obj]

    if im_feats is None:
        bounds = np.array(im.size, dtype=np.float) - 1
        im = np.array(im.convert('L'))
        # compute image gradient
        im_feats = gaussian_filter(sobel(im), 0.5)
    else:
        bounds = np.array(im_feats.shape[::-1], dtype=np.float) - 1

    def _ray_intersect_boundaries(ray, direction, aabb):
        """
        Simplified version of [0] for 2d and AABB anchored at (0,0).

        [0] http://gamedev.stackexchange.com/questions/18436/most-efficient-aabb-vs-ray-collision-algorithms
        """
        dir_fraction = np.empty(2, dtype=ray.dtype)
        dir_fraction[direction == 0.0] = np.inf
        dir_fraction[direction != 0.0] = np.divide(1.0,
                                                   direction[direction != 0.0])

        t1 = (-ray[0]) * dir_fraction[0]
        t2 = (aabb[0] - ray[0]) * dir_fraction[0]
        t3 = (-ray[1]) * dir_fraction[1]
        t4 = (aabb[1] - ray[1]) * dir_fraction[1]

        tmin = max(min(t1, t2), min(t3, t4))
        tmax = min(max(t1, t2), max(t3, t4))

        t = min(x for x in [tmin, tmax] if x >= 0)
        return ray + (direction * t)

    def _calc_seam(baseline, polygon, angle, bias=150):
        """
        Calculates seam between baseline and ROI boundary on one side.

        Adds a baseline-distance-weighted bias to the feature map, masks
        out the bounding polygon and rotates the line so it is roughly
        level.
        """
        MASK_VAL = 99999
        r, c = draw.polygon(polygon[:, 1], polygon[:, 0])
        c_min, c_max = int(polygon[:, 0].min()), int(polygon[:, 0].max())
        r_min, r_max = int(polygon[:, 1].min()), int(polygon[:, 1].max())
        patch = im_feats[r_min:r_max + 2, c_min:c_max + 2].copy()
        # bias feature matrix by distance from baseline
        mask = np.ones_like(patch)
        for l in zip(baseline[:-1] - (c_min, r_min),
                     baseline[1:] - (c_min, r_min)):
            line_locs = draw.line(l[0][1], l[0][0], l[1][1], l[1][0])
            mask[line_locs] = 0
        dist_bias = distance_transform_cdt(mask)
        # absolute mask
        mask = np.ones_like(patch, dtype=np.bool)
        mask[r - r_min, c - c_min] = False
        # combine weights with features
        patch[mask] = MASK_VAL
        patch += (dist_bias * (np.mean(patch[patch != MASK_VAL]) / bias))
        extrema = baseline[(0, -1), :] - (c_min, r_min)
        # scale line image to max 600 pixel width
        scale = min(1.0, 600 / (c_max - c_min))
        tform, rotated_patch = _rotate(patch,
                                       angle,
                                       center=extrema[0],
                                       scale=scale,
                                       cval=MASK_VAL)
        # ensure to cut off padding after rotation
        x_offsets = np.sort(
            np.around(tform.inverse(extrema)[:, 0]).astype('int'))
        rotated_patch = rotated_patch[:, x_offsets[0]:x_offsets[1] + 1]
        # infinity pad for seamcarve
        rotated_patch = np.pad(rotated_patch, ((1, 1), (0, 0)),
                               mode='constant',
                               constant_values=np.inf)
        r, c = rotated_patch.shape
        # fold into shape (c, r-2 3)
        A = np.lib.stride_tricks.as_strided(
            rotated_patch, (c, r - 2, 3),
            (rotated_patch.strides[1], rotated_patch.strides[0],
             rotated_patch.strides[0]))
        B = rotated_patch[1:-1, 1:].swapaxes(0, 1)
        backtrack = np.zeros_like(B, dtype='int')
        T = np.empty((B.shape[1]), 'f')
        R = np.arange(-1, len(T) - 1)
        for i in np.arange(c - 1):
            A[i].min(1, T)
            backtrack[i] = A[i].argmin(1) + R
            B[i] += T
        # backtrack
        seam = []
        j = np.argmin(rotated_patch[1:-1, -1])
        for i in range(c - 2, -2, -1):
            seam.append((i + x_offsets[0] + 1, j))
            j = backtrack[i, j]
        seam = np.array(seam)[::-1]
        seam_mean = seam[:, 1].mean()
        seam_std = seam[:, 1].std()
        seam[:, 1] = np.clip(seam[:, 1], seam_mean - seam_std,
                             seam_mean + seam_std)
        # rotate back
        seam = tform(seam).astype('int')
        # filter out seam points in masked area of original patch/in padding
        seam = seam[seam.min(axis=1) >= 0, :]
        m = (seam < mask.shape[::-1]).T
        seam = seam[np.logical_and(m[0], m[1]), :]
        seam = seam[np.invert(mask[seam.T[1], seam.T[0]])]
        seam += (c_min, r_min)
        return seam

    def _extract_patch(env_up, env_bottom, baseline, dir_vec):
        """
        Calculate a line image patch from a ROI and the original baseline.
        """
        upper_polygon = np.concatenate((baseline, env_up[::-1]))
        bottom_polygon = np.concatenate((baseline, env_bottom[::-1]))
        angle = np.arctan2(dir_vec[1], dir_vec[0])

        upper_seam = _calc_seam(baseline, upper_polygon, angle)
        bottom_seam = _calc_seam(baseline, bottom_polygon, angle)

        polygon = np.concatenate(
            ([baseline[0]], upper_seam.astype('int'), [baseline[-1]],
             bottom_seam.astype('int')[::-1]))
        return approximate_polygon(polygon, 3).tolist()

    polygons = []
    if suppl_obj is None:
        suppl_obj = []

    for idx, line in enumerate(baselines):
        try:
            # find intercepts with image bounds on each side of baseline
            line = np.array(line, dtype=np.float)
            # calculate magnitude-weighted average direction vector
            lengths = np.linalg.norm(np.diff(line.T), axis=0)
            p_dir = np.mean(np.diff(line.T) * lengths / lengths.sum(), axis=1)
            p_dir = (p_dir.T / np.sqrt(np.sum(p_dir**2, axis=-1)))
            # interpolate baseline
            ls = geom.LineString(line)
            ip_line = [line[0]]
            dist = 10
            while dist < ls.length:
                ip_line.append(np.array(ls.interpolate(dist)))
                dist += 10
            ip_line.append(line[-1])
            ip_line = np.array(ip_line)
            upper_bounds_intersects = []
            bottom_bounds_intersects = []
            for point in ip_line:
                upper_bounds_intersects.append(
                    _ray_intersect_boundaries(point, (p_dir * (-1, 1))[::-1],
                                              bounds + 1).astype('int'))
                bottom_bounds_intersects.append(
                    _ray_intersect_boundaries(point, (p_dir * (1, -1))[::-1],
                                              bounds + 1).astype('int'))
            # build polygon between baseline and bbox intersects
            upper_polygon = geom.Polygon(ip_line.tolist() +
                                         upper_bounds_intersects)
            bottom_polygon = geom.Polygon(ip_line.tolist() +
                                          bottom_bounds_intersects)

            # select baselines at least partially in each polygon
            side_a = [geom.LineString(upper_bounds_intersects)]
            side_b = [geom.LineString(bottom_bounds_intersects)]

            for adj_line in baselines[:idx] + baselines[idx + 1:] + suppl_obj:
                adj_line = geom.LineString(adj_line)
                if upper_polygon.intersects(adj_line):
                    side_a.append(adj_line)
                elif bottom_polygon.intersects(adj_line):
                    side_b.append(adj_line)
            side_a = unary_union(side_a).buffer(1).boundary
            side_b = unary_union(side_b).buffer(1).boundary

            def _find_closest_point(pt, intersects):
                spt = geom.Point(pt)
                if intersects.type == 'MultiPoint':
                    return min([p for p in intersects],
                               key=lambda x: spt.distance(x))
                elif intersects.type == 'Point':
                    return intersects
                elif intersects.type == 'GeometryCollection' and len(
                        intersects) > 0:
                    t = min([p for p in intersects],
                            key=lambda x: spt.distance(x))
                    if t == 'Point':
                        return t
                    else:
                        return nearest_points(spt, t)[1]
                else:
                    raise Exception(
                        'No intersection with boundaries. Shapely intersection object: {}'
                        .format(intersects.wkt))

            # interpolate baseline
            env_up = []
            env_bottom = []
            # find orthogonal (to linear regression) intersects with adjacent objects to complete roi
            for point, upper_bounds_intersect, bottom_bounds_intersect in zip(
                    ip_line, upper_bounds_intersects,
                    bottom_bounds_intersects):
                upper_limit = _find_closest_point(
                    point,
                    geom.LineString([point, upper_bounds_intersect
                                     ]).intersection(side_a))
                bottom_limit = _find_closest_point(
                    point,
                    geom.LineString([point, bottom_bounds_intersect
                                     ]).intersection(side_b))
                env_up.append(upper_limit.coords[0])
                env_bottom.append(bottom_limit.coords[0])
            env_up = np.array(env_up, dtype='uint')
            env_bottom = np.array(env_bottom, dtype='uint')
            polygons.append(
                _extract_patch(env_up, env_bottom, line.astype('int'), p_dir))
        except Exception as e:
            logger.warning(f'Polygonizer failed on line {idx}: {e}')
            polygons.append(None)

    if scale is not None:
        polygons = [
            (np.array(pol) /
             scale).astype('uint').tolist() if pol is not None else None
            for pol in polygons
        ]
    return polygons
Beispiel #11
0
def calculate_polygonal_environment(
        im: PIL.Image.Image = None,
        baselines: Sequence[Sequence[Tuple[int, int]]] = None,
        suppl_obj: Sequence[Sequence[Tuple[int, int]]] = None,
        im_feats: np.ndarray = None,
        scale: Tuple[int, int] = None,
        topline: bool = False):
    """
    Given a list of baselines and an input image, calculates a polygonal
    environment around each baseline.

    Args:
        im (PIL.Image): grayscale input image (mode 'L')
        baselines (sequence): List of lists containing a single baseline per
                              entry.
        suppl_obj (sequence): List of lists containing additional polylines
                              that should be considered hard boundaries for
                              polygonizaton purposes. Can be used to prevent
                              polygonization into non-text areas such as
                              illustrations or to compute the polygonization of
                              a subset of the lines in an image.
        im_feats (numpy.array): An optional precomputed seamcarve energy map.
                                Overrides data in `im`. The default map is
                                `gaussian_filter(sobel(im), 2)`.
        scale (tuple): A 2-tuple (h, w) containing optional scale factors of
                       the input. Values of 0 are used for aspect-preserving
                       scaling. `None` skips input scaling.
        topline (bool): Switch to change default baseline location for offset
                        calculation purposes. If set to False, baselines are
                        assumed to be on the bottom of the text line and will
                        be offset upwards, if set to True, baselines are on the
                        top and will be offset downwards. If set to None, no
                        offset will be applied.
    Returns:
        List of lists of coordinates. If no polygonization could be compute for
        a baseline `None` is returned instead.
    """
    if scale is not None and (scale[0] > 0 or scale[1] > 0):
        w, h = im.size
        oh, ow = scale
        if oh == 0:
            oh = int(h * ow / w)
        elif ow == 0:
            ow = int(w * oh / h)
        im = im.resize((ow, oh))
        scale = np.array((ow / w, oh / h))
        # rescale baselines
        baselines = [(np.array(bl) * scale).astype('int').tolist()
                     for bl in baselines]
        # rescale suppl_obj
        if suppl_obj is not None:
            suppl_obj = [(np.array(bl) * scale).astype('int').tolist()
                         for bl in suppl_obj]

    if im_feats is None:
        bounds = np.array(im.size, dtype=float) - 1
        im = np.array(im.convert('L'))
        # compute image gradient
        im_feats = gaussian_filter(sobel(im), 0.5)
    else:
        bounds = np.array(im_feats.shape[::-1], dtype=float) - 1

    polygons = []
    if suppl_obj is None:
        suppl_obj = []

    for idx, line in enumerate(baselines):
        try:
            end_points = (line[0], line[-1])
            line = geom.LineString(line)
            offset = default_specs.SEGMENTATION_HYPER_PARAMS[
                'line_width'] if topline is not None else 0
            offset_line = line.parallel_offset(
                offset, side='left' if topline else 'right')
            line = np.array(line, dtype=float)
            offset_line = np.array(offset_line, dtype=float)

            # parallel_offset on the right reverses the coordinate order
            if not topline:
                offset_line = offset_line[::-1]
            # calculate magnitude-weighted average direction vector
            lengths = np.linalg.norm(np.diff(line.T), axis=0)
            p_dir = np.mean(np.diff(line.T) * lengths / lengths.sum(), axis=1)
            p_dir = (p_dir.T / np.sqrt(np.sum(p_dir**2, axis=-1)))

            env_up, env_bottom = _calc_roi(
                line, bounds, baselines[:idx] + baselines[idx + 1:], suppl_obj,
                p_dir)

            polygons.append(
                _extract_patch(env_up, env_bottom, line.astype('int'),
                               offset_line.astype('int'), end_points, p_dir,
                               topline, offset, im_feats))
        except Exception as e:
            logger.warning(f'Polygonizer failed on line {idx}: {e}')
            polygons.append(None)

    if scale is not None:
        polygons = [
            (np.array(pol) /
             scale).astype('uint').tolist() if pol is not None else None
            for pol in polygons
        ]
    return polygons