Exemple #1
0
    def predict(self, image: PIL.Image.Image, top_k: int = 3) -> List[InferencerPrediction]:
        """
        Predict labels for image
        :param image:
        :param top_k:
        :return:
        """
        # resize the input image and preprocess it
        image = image.resize(self.target_size)
        image = tf.keras.preprocessing.image.img_to_array(image)
        image = tf.keras.applications.mobilenet_v2.preprocess_input(image)
        image = np.expand_dims(image, axis=0)

        # pass to model
        result = self.classifier.predict(image)

        result = sorted(
            list(zip(
                self.labels
                , np.squeeze(result).tolist()
            )
            )
            , key=lambda x: x[1]
            , reverse=True
        )

        result = result[:top_k]

        res = [InferencerPrediction(label=r[0], confidence=r[1]) for r in result]

        return res
Exemple #2
0
def _resize_image_pair(
    img1: PIL.Image.Image,
    img2: PIL.Image.Image,
    trim1: bool = True,
    trim2: bool = True,
) -> typing.Tuple[PIL.Image.Image, PIL.Image.Image]:
    def _trim_helper(img):
        return _trim_image(
            _trim_image(
                _quantize_color(img, distance=15),  # hardcoded threshold
                border_color=PIL.ImageColor.getcolor("white", "RGBA")))

    img1 = _trim_helper(img1) if trim1 else img1
    img2 = _trim_helper(img2) if trim2 else img2

    w = max(img1.width, img2.width)
    h = max(img1.height, img2.height)

    return img1.resize((w, h)), img2.resize((w, h))
Exemple #3
0
def make_power_2(img: PIL.Image.Image,
                 base: int,
                 interp_method=Image.BICUBIC) -> np.ndarray:
    ow, oh = img.size
    h = int(np.round(oh / base) * base)
    w = int(np.round(ow / base) * base)
    if (h == oh) and (w == ow):
        return img

    return img.resize((w, h), interp_method)
Exemple #4
0
    def preprocess_pil_image(self, pil_image: PIL.Image.Image):
        """Дообработка изображения в формате PIL"""
        # Конвертировать изображение в формат RGB, если оно еще не в этом формате.
        if pil_image.mode != "RGB":
            pil_image = pil_image.convert("RGB")

        # Изменить размер изображения на подходящий нейросети.
        pil_image = pil_image.resize((self.image_size_x, self.image_size_y))

        # Преобразовать изображения из формата PIL в трехмерный массив Numpy.
        np_array = image.img_to_array(pil_image)

        # Выполнить обработку изображения собственной функцией препроцессинга нейросети.
        return self.preprocess_input(np.expand_dims(np_array, axis=0))
def postprocess_image(img: torch.Tensor,
                      target_img: PIL.Image.Image) -> PIL.Image.Image:
    assert img.shape[0] == 1 and img.shape[1] == 3
    assert isinstance(target_img, PIL.Image.Image)

    # resize target image if needed (= if it was resized in preprocessing)
    source_size = (img.shape[3], img.shape[2])
    target_size = target_img.size
    target_img = target_img.resize(source_size, resample=PIL.Image.LANCZOS)

    # convert both source and target to numpy
    target_img_numpy = np.array(target_img)
    img_numpy = img.numpy().squeeze().transpose(1, 2, 0)[:, :, ::-1]

    result = histogram_matching(img_numpy, target_img_numpy)
    result_pil = PIL.Image.fromarray(result.astype(np.uint8))
    return result_pil.resize(target_size, resample=PIL.Image.LANCZOS)
Exemple #6
0
def resize_keep_aspect_ratio(
    image: PIL.Image.Image, target_width: int, target_height: int
) -> (PIL.Image.Image, str):
    """
    Resize image to target width, but keep the aspect ratio to avoid distorting the image.

    :param image: original image
    :param target_width: the width we want
    :param target_height: the height we want
    :return: a tuple of resized image, and string saying if it was resized by height or width
    """
    ratio_width = target_width / image.width
    ratio_height = target_height / image.height
    if ratio_width > ratio_height:
        resized_by = "width"
        resize_width = target_width
        resize_height = round(ratio_width * image.height)
    else:
        resized_by = "height"
        resize_width = round(ratio_height * image.width)
        resize_height = target_height

    resized_image = image.resize((resize_width, resize_height), Image.LANCZOS)
    return resized_image, resized_by
def preprocess_image(
    image: PIL.Image.Image,
    new_size: int = 256,
    mean: np.ndarray = np.array([0.40760392, 0.45795686, 0.48501961])
) -> torch.Tensor:
    assert isinstance(image, PIL.Image.Image)

    # use PIL here because it resamples properly
    # (https://twitter.com/jaakkolehtinen/status/1258102168176951299)
    image = image.resize((new_size, new_size), resample=PIL.Image.LANCZOS)

    # RGB to BGR
    r, g, b = image.split()
    image_bgr = PIL.Image.merge('RGB', (b, g, r))

    # normalization
    image_numpy = np.array(image_bgr, dtype=np.float32) / 255.0
    image_numpy -= mean
    image_numpy *= 255.0

    # [H, W, C] -> [N, C, H, W]
    image_numpy = np.transpose(image_numpy, (2, 0, 1))[None, :, :, :]

    return torch.from_numpy(image_numpy).to(torch.float32)
Exemple #8
0
def resize_img(img: PIL.Image.Image, size: list) -> PIL.Image.Image:
    return img.resize(size=size, resample=Image.ANTIALIAS)
Exemple #9
0
 def _preprocess(query: PIL.Image.Image):
     img = query.resize((416, 416))
     img = np.array(img).astype(np.float32)
     img = np.rollaxis(img, 2, 0)
     img = np.expand_dims(img, axis=0)
     return img
Exemple #10
0
def calculate_polygonal_environment(
        im: PIL.Image.Image = None,
        baselines: Sequence[Sequence[Tuple[int, int]]] = None,
        suppl_obj: Sequence[Sequence[Tuple[int, int]]] = None,
        im_feats: np.array = None,
        scale: Tuple[int, int] = None):
    """
    Given a list of baselines and an input image, calculates a polygonal
    environment around each baseline.

    Args:
        im (PIL.Image): grayscale input image (mode 'L')
        baselines (sequence): List of lists containing a single baseline per
                              entry.
        suppl_obj (sequence): List of lists containing additional polylines
                              that should be considered hard boundaries for
                              polygonizaton purposes. Can be used to prevent
                              polygonization into non-text areas such as
                              illustrations or to compute the polygonization of
                              a subset of the lines in an image.
        im_feats (numpy.array): An optional precomputed seamcarve energy map.
                                Overrides data in `im`. The default map is
                                `gaussian_filter(sobel(im), 2)`.
        scale (tuple): A 2-tuple (h, w) containing optional scale factors of
                       the input. Values of 0 are used for aspect-preserving
                       scaling. `None` skips input scaling.
    Returns:
        List of lists of coordinates. If no polygonization could be compute for
        a baseline `None` is returned instead.
    """
    if scale is not None and (scale[0] > 0 or scale[1] > 0):
        w, h = im.size
        oh, ow = scale
        if oh == 0:
            oh = int(h * ow / w)
        elif ow == 0:
            ow = int(w * oh / h)
        im = im.resize((ow, oh))
        scale = np.array((ow / w, oh / h))
        # rescale baselines
        baselines = [(np.array(bl) * scale).astype('int').tolist()
                     for bl in baselines]
        # rescale suppl_obj
        if suppl_obj is not None:
            suppl_obj = [(np.array(bl) * scale).astype('int').tolist()
                         for bl in suppl_obj]

    if im_feats is None:
        bounds = np.array(im.size, dtype=np.float) - 1
        im = np.array(im.convert('L'))
        # compute image gradient
        im_feats = gaussian_filter(sobel(im), 0.5)
    else:
        bounds = np.array(im_feats.shape[::-1], dtype=np.float) - 1

    def _ray_intersect_boundaries(ray, direction, aabb):
        """
        Simplified version of [0] for 2d and AABB anchored at (0,0).

        [0] http://gamedev.stackexchange.com/questions/18436/most-efficient-aabb-vs-ray-collision-algorithms
        """
        dir_fraction = np.empty(2, dtype=ray.dtype)
        dir_fraction[direction == 0.0] = np.inf
        dir_fraction[direction != 0.0] = np.divide(1.0,
                                                   direction[direction != 0.0])

        t1 = (-ray[0]) * dir_fraction[0]
        t2 = (aabb[0] - ray[0]) * dir_fraction[0]
        t3 = (-ray[1]) * dir_fraction[1]
        t4 = (aabb[1] - ray[1]) * dir_fraction[1]

        tmin = max(min(t1, t2), min(t3, t4))
        tmax = min(max(t1, t2), max(t3, t4))

        t = min(x for x in [tmin, tmax] if x >= 0)
        return ray + (direction * t)

    def _calc_seam(baseline, polygon, angle, bias=150):
        """
        Calculates seam between baseline and ROI boundary on one side.

        Adds a baseline-distance-weighted bias to the feature map, masks
        out the bounding polygon and rotates the line so it is roughly
        level.
        """
        MASK_VAL = 99999
        r, c = draw.polygon(polygon[:, 1], polygon[:, 0])
        c_min, c_max = int(polygon[:, 0].min()), int(polygon[:, 0].max())
        r_min, r_max = int(polygon[:, 1].min()), int(polygon[:, 1].max())
        patch = im_feats[r_min:r_max + 2, c_min:c_max + 2].copy()
        # bias feature matrix by distance from baseline
        mask = np.ones_like(patch)
        for l in zip(baseline[:-1] - (c_min, r_min),
                     baseline[1:] - (c_min, r_min)):
            line_locs = draw.line(l[0][1], l[0][0], l[1][1], l[1][0])
            mask[line_locs] = 0
        dist_bias = distance_transform_cdt(mask)
        # absolute mask
        mask = np.ones_like(patch, dtype=np.bool)
        mask[r - r_min, c - c_min] = False
        # combine weights with features
        patch[mask] = MASK_VAL
        patch += (dist_bias * (np.mean(patch[patch != MASK_VAL]) / bias))
        extrema = baseline[(0, -1), :] - (c_min, r_min)
        # scale line image to max 600 pixel width
        scale = min(1.0, 600 / (c_max - c_min))
        tform, rotated_patch = _rotate(patch,
                                       angle,
                                       center=extrema[0],
                                       scale=scale,
                                       cval=MASK_VAL)
        # ensure to cut off padding after rotation
        x_offsets = np.sort(
            np.around(tform.inverse(extrema)[:, 0]).astype('int'))
        rotated_patch = rotated_patch[:, x_offsets[0]:x_offsets[1] + 1]
        # infinity pad for seamcarve
        rotated_patch = np.pad(rotated_patch, ((1, 1), (0, 0)),
                               mode='constant',
                               constant_values=np.inf)
        r, c = rotated_patch.shape
        # fold into shape (c, r-2 3)
        A = np.lib.stride_tricks.as_strided(
            rotated_patch, (c, r - 2, 3),
            (rotated_patch.strides[1], rotated_patch.strides[0],
             rotated_patch.strides[0]))
        B = rotated_patch[1:-1, 1:].swapaxes(0, 1)
        backtrack = np.zeros_like(B, dtype='int')
        T = np.empty((B.shape[1]), 'f')
        R = np.arange(-1, len(T) - 1)
        for i in np.arange(c - 1):
            A[i].min(1, T)
            backtrack[i] = A[i].argmin(1) + R
            B[i] += T
        # backtrack
        seam = []
        j = np.argmin(rotated_patch[1:-1, -1])
        for i in range(c - 2, -2, -1):
            seam.append((i + x_offsets[0] + 1, j))
            j = backtrack[i, j]
        seam = np.array(seam)[::-1]
        seam_mean = seam[:, 1].mean()
        seam_std = seam[:, 1].std()
        seam[:, 1] = np.clip(seam[:, 1], seam_mean - seam_std,
                             seam_mean + seam_std)
        # rotate back
        seam = tform(seam).astype('int')
        # filter out seam points in masked area of original patch/in padding
        seam = seam[seam.min(axis=1) >= 0, :]
        m = (seam < mask.shape[::-1]).T
        seam = seam[np.logical_and(m[0], m[1]), :]
        seam = seam[np.invert(mask[seam.T[1], seam.T[0]])]
        seam += (c_min, r_min)
        return seam

    def _extract_patch(env_up, env_bottom, baseline, dir_vec):
        """
        Calculate a line image patch from a ROI and the original baseline.
        """
        upper_polygon = np.concatenate((baseline, env_up[::-1]))
        bottom_polygon = np.concatenate((baseline, env_bottom[::-1]))
        angle = np.arctan2(dir_vec[1], dir_vec[0])

        upper_seam = _calc_seam(baseline, upper_polygon, angle)
        bottom_seam = _calc_seam(baseline, bottom_polygon, angle)

        polygon = np.concatenate(
            ([baseline[0]], upper_seam.astype('int'), [baseline[-1]],
             bottom_seam.astype('int')[::-1]))
        return approximate_polygon(polygon, 3).tolist()

    polygons = []
    if suppl_obj is None:
        suppl_obj = []

    for idx, line in enumerate(baselines):
        try:
            # find intercepts with image bounds on each side of baseline
            line = np.array(line, dtype=np.float)
            # calculate magnitude-weighted average direction vector
            lengths = np.linalg.norm(np.diff(line.T), axis=0)
            p_dir = np.mean(np.diff(line.T) * lengths / lengths.sum(), axis=1)
            p_dir = (p_dir.T / np.sqrt(np.sum(p_dir**2, axis=-1)))
            # interpolate baseline
            ls = geom.LineString(line)
            ip_line = [line[0]]
            dist = 10
            while dist < ls.length:
                ip_line.append(np.array(ls.interpolate(dist)))
                dist += 10
            ip_line.append(line[-1])
            ip_line = np.array(ip_line)
            upper_bounds_intersects = []
            bottom_bounds_intersects = []
            for point in ip_line:
                upper_bounds_intersects.append(
                    _ray_intersect_boundaries(point, (p_dir * (-1, 1))[::-1],
                                              bounds + 1).astype('int'))
                bottom_bounds_intersects.append(
                    _ray_intersect_boundaries(point, (p_dir * (1, -1))[::-1],
                                              bounds + 1).astype('int'))
            # build polygon between baseline and bbox intersects
            upper_polygon = geom.Polygon(ip_line.tolist() +
                                         upper_bounds_intersects)
            bottom_polygon = geom.Polygon(ip_line.tolist() +
                                          bottom_bounds_intersects)

            # select baselines at least partially in each polygon
            side_a = [geom.LineString(upper_bounds_intersects)]
            side_b = [geom.LineString(bottom_bounds_intersects)]

            for adj_line in baselines[:idx] + baselines[idx + 1:] + suppl_obj:
                adj_line = geom.LineString(adj_line)
                if upper_polygon.intersects(adj_line):
                    side_a.append(adj_line)
                elif bottom_polygon.intersects(adj_line):
                    side_b.append(adj_line)
            side_a = unary_union(side_a).buffer(1).boundary
            side_b = unary_union(side_b).buffer(1).boundary

            def _find_closest_point(pt, intersects):
                spt = geom.Point(pt)
                if intersects.type == 'MultiPoint':
                    return min([p for p in intersects],
                               key=lambda x: spt.distance(x))
                elif intersects.type == 'Point':
                    return intersects
                elif intersects.type == 'GeometryCollection' and len(
                        intersects) > 0:
                    t = min([p for p in intersects],
                            key=lambda x: spt.distance(x))
                    if t == 'Point':
                        return t
                    else:
                        return nearest_points(spt, t)[1]
                else:
                    raise Exception(
                        'No intersection with boundaries. Shapely intersection object: {}'
                        .format(intersects.wkt))

            # interpolate baseline
            env_up = []
            env_bottom = []
            # find orthogonal (to linear regression) intersects with adjacent objects to complete roi
            for point, upper_bounds_intersect, bottom_bounds_intersect in zip(
                    ip_line, upper_bounds_intersects,
                    bottom_bounds_intersects):
                upper_limit = _find_closest_point(
                    point,
                    geom.LineString([point, upper_bounds_intersect
                                     ]).intersection(side_a))
                bottom_limit = _find_closest_point(
                    point,
                    geom.LineString([point, bottom_bounds_intersect
                                     ]).intersection(side_b))
                env_up.append(upper_limit.coords[0])
                env_bottom.append(bottom_limit.coords[0])
            env_up = np.array(env_up, dtype='uint')
            env_bottom = np.array(env_bottom, dtype='uint')
            polygons.append(
                _extract_patch(env_up, env_bottom, line.astype('int'), p_dir))
        except Exception as e:
            logger.warning(f'Polygonizer failed on line {idx}: {e}')
            polygons.append(None)

    if scale is not None:
        polygons = [
            (np.array(pol) /
             scale).astype('uint').tolist() if pol is not None else None
            for pol in polygons
        ]
    return polygons
def convert_pilimage_to_nparray(pil_image: PIL.Image.Image) -> np.array:
    NUM_RGB = 3
    IMG_WIDTH, IMG_HEIGHT = 150, 150
    SCALE = 255
    np_image = np.array(pil_image.resize((IMG_WIDTH, IMG_HEIGHT))) / SCALE
    return np_image.reshape((1, IMG_WIDTH, IMG_HEIGHT, NUM_RGB))
Exemple #12
0
def calculate_polygonal_environment(
        im: PIL.Image.Image = None,
        baselines: Sequence[Sequence[Tuple[int, int]]] = None,
        suppl_obj: Sequence[Sequence[Tuple[int, int]]] = None,
        im_feats: np.ndarray = None,
        scale: Tuple[int, int] = None,
        topline: bool = False):
    """
    Given a list of baselines and an input image, calculates a polygonal
    environment around each baseline.

    Args:
        im (PIL.Image): grayscale input image (mode 'L')
        baselines (sequence): List of lists containing a single baseline per
                              entry.
        suppl_obj (sequence): List of lists containing additional polylines
                              that should be considered hard boundaries for
                              polygonizaton purposes. Can be used to prevent
                              polygonization into non-text areas such as
                              illustrations or to compute the polygonization of
                              a subset of the lines in an image.
        im_feats (numpy.array): An optional precomputed seamcarve energy map.
                                Overrides data in `im`. The default map is
                                `gaussian_filter(sobel(im), 2)`.
        scale (tuple): A 2-tuple (h, w) containing optional scale factors of
                       the input. Values of 0 are used for aspect-preserving
                       scaling. `None` skips input scaling.
        topline (bool): Switch to change default baseline location for offset
                        calculation purposes. If set to False, baselines are
                        assumed to be on the bottom of the text line and will
                        be offset upwards, if set to True, baselines are on the
                        top and will be offset downwards. If set to None, no
                        offset will be applied.
    Returns:
        List of lists of coordinates. If no polygonization could be compute for
        a baseline `None` is returned instead.
    """
    if scale is not None and (scale[0] > 0 or scale[1] > 0):
        w, h = im.size
        oh, ow = scale
        if oh == 0:
            oh = int(h * ow / w)
        elif ow == 0:
            ow = int(w * oh / h)
        im = im.resize((ow, oh))
        scale = np.array((ow / w, oh / h))
        # rescale baselines
        baselines = [(np.array(bl) * scale).astype('int').tolist()
                     for bl in baselines]
        # rescale suppl_obj
        if suppl_obj is not None:
            suppl_obj = [(np.array(bl) * scale).astype('int').tolist()
                         for bl in suppl_obj]

    if im_feats is None:
        bounds = np.array(im.size, dtype=float) - 1
        im = np.array(im.convert('L'))
        # compute image gradient
        im_feats = gaussian_filter(sobel(im), 0.5)
    else:
        bounds = np.array(im_feats.shape[::-1], dtype=float) - 1

    polygons = []
    if suppl_obj is None:
        suppl_obj = []

    for idx, line in enumerate(baselines):
        try:
            end_points = (line[0], line[-1])
            line = geom.LineString(line)
            offset = default_specs.SEGMENTATION_HYPER_PARAMS[
                'line_width'] if topline is not None else 0
            offset_line = line.parallel_offset(
                offset, side='left' if topline else 'right')
            line = np.array(line, dtype=float)
            offset_line = np.array(offset_line, dtype=float)

            # parallel_offset on the right reverses the coordinate order
            if not topline:
                offset_line = offset_line[::-1]
            # calculate magnitude-weighted average direction vector
            lengths = np.linalg.norm(np.diff(line.T), axis=0)
            p_dir = np.mean(np.diff(line.T) * lengths / lengths.sum(), axis=1)
            p_dir = (p_dir.T / np.sqrt(np.sum(p_dir**2, axis=-1)))

            env_up, env_bottom = _calc_roi(
                line, bounds, baselines[:idx] + baselines[idx + 1:], suppl_obj,
                p_dir)

            polygons.append(
                _extract_patch(env_up, env_bottom, line.astype('int'),
                               offset_line.astype('int'), end_points, p_dir,
                               topline, offset, im_feats))
        except Exception as e:
            logger.warning(f'Polygonizer failed on line {idx}: {e}')
            polygons.append(None)

    if scale is not None:
        polygons = [
            (np.array(pol) /
             scale).astype('uint').tolist() if pol is not None else None
            for pol in polygons
        ]
    return polygons