def convert_image_to_array(image: PIL.Image.Image) -> np.ndarray: if image.mode != "RGB": image = image.convert("RGB") (im_width, im_height) = image.size return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
def add_shapes( background: PIL.Image.Image, shape_img: PIL.Image.Image, shape_params, ) -> Tuple[List[Tuple[int, int, int, int, int]], PIL.Image.Image]: """Paste shapes onto background and return bboxes""" shape_bboxes: List[Tuple[int, int, int, int, int]] = [] for i, shape_param in enumerate(shape_params): x = shape_param[-2] y = shape_param[-1] x1, y1, x2, y2 = shape_img.getbbox() bg_at_shape = background.crop((x1 + x, y1 + y, x2 + x, y2 + y)) bg_at_shape.paste(shape_img, (0, 0), shape_img) background.paste(bg_at_shape, (x, y)) # Slightly expand the bounding box in order to simulate variability with # the detection boxes. Always make the crop larger than needed because training # augmentations will only be able to crop down. dx = random.randint(0, int(0.1 * (x2 - x1))) dy = random.randint(0, int(0.1 * (y2 - y1))) x1 -= dx x2 += dx y1 -= dy y2 += dy background = background.crop((x1 + x, y1 + y, x2 + x, y2 + y)) background = background.filter(ImageFilter.SMOOTH_MORE) return background.convert("RGB")
def _binarize_image(img: PIL.Image.Image, threshold: float) -> PIL.Image.Image: output = img.convert("L") for x in range(output.width): for y in range(output.height): output.putpixel( xy=(x, y), value=0 if output.getpixel((x, y)) < threshold else 255, ) return output
def preprocess_pil_image(self, pil_image: PIL.Image.Image): """Дообработка изображения в формате PIL""" # Конвертировать изображение в формат RGB, если оно еще не в этом формате. if pil_image.mode != "RGB": pil_image = pil_image.convert("RGB") # Изменить размер изображения на подходящий нейросети. pil_image = pil_image.resize((self.image_size_x, self.image_size_y)) # Преобразовать изображения из формата PIL в трехмерный массив Numpy. np_array = image.img_to_array(pil_image) # Выполнить обработку изображения собственной функцией препроцессинга нейросети. return self.preprocess_input(np.expand_dims(np_array, axis=0))
def set_image_mpl_cmap(image: PIL.Image.Image, cmap: str): """ Set a PIL.Image to use a matplotlib colour map See https://matplotlib.org/3.1.1/gallery/color/colormap_reference.html Args: image: Image to modify cmap: Matplotlib colour map name """ image = image.copy() cmap = plt.get_cmap(cmap) value_levels = numpy.linspace(0, 1, 2**8) pallet = to_bytes(cmap(value_levels))[:, 0:3] image.putpalette(pallet, "RGB") return image.convert("RGB")
def prepare_img_for_numpy(img: PIL.Image.Image) -> None: """ Preparing image for transforming into numpy ndarray. Parameters: img (PIL.Image.Image) : image to be prepared. Returns: prepared_img (PIL.Image.Image) : prepared image. """ if not isinstance(img, PIL.Image.Image): raise TypeError( "prepare_img_for_numpy: expected image of type PIL.Image.Image, got {0}" .format(type(img))) prepared_img = img.convert("RGB") return prepared_img
def apply_blur(img: PIL.Image.Image, blur_radius: int = BLUR_blur_radius) -> PIL.Image.Image: """ Applies blur for PIL.Image image. Parameters: img (PIL.Image) : image to be blurred. blur_radius (int) : blur blur_radius, or blur strength. Returns: blurred_image (PIL.Image) : blurred image. """ if not isinstance(img, PIL.Image.Image): raise TypeError( "apply_blur: expected img of type PIL.Image, got {0}".format( type(img))) blurred_image = img.convert("RGBA") blurred_image = blurred_image.filter(ImageFilter.GaussianBlur(blur_radius)) return blurred_image
def add_shapes( background: PIL.Image.Image, shape_imgs: PIL.Image.Image, shape_params, blur_radius: int, ) -> Tuple[List[Tuple[int, int, int, int, int]], PIL.Image.Image]: """Paste shapes onto background and return bboxes""" shape_bboxes: List[Tuple[int, int, int, int, int]] = [] for i, shape_param in enumerate(shape_params): x = shape_param[-2] y = shape_param[-1] shape_img = shape_imgs[i] shape_img = shape_img.filter(ImageFilter.GaussianBlur(1)) x1, y1, x2, y2 = shape_img.getbbox() bg_at_shape = background.crop((x1 + x, y1 + y, x2 + x, y2 + y)) bg_at_shape.paste(shape_img, (0, 0), shape_img) bg_at_shape = bg_at_shape.filter(ImageFilter.SMOOTH_MORE) background.paste(bg_at_shape, (x, y)) im_w, im_h = background.size x /= im_w y /= im_h w = (x2 - x1) / im_w h = (y2 - y1) / im_h shape_bboxes.append((CLASSES.index(shape_param[0]), x, y, w, h)) """ shape_bboxes.append( ( CLASSES.index(shape_param[2]), x + (0.1 * w), y + (0.1 * h), 0.8 * w, 0.8 * h, ) ) """ return shape_bboxes, background.convert("RGB")
def apply_palette_reduction( img: PIL.Image.Image, reduced_palette_colors_count: int = REDUCED_PALETTE_COLORS_COUNT ) -> PIL.Image.Image: """ Applies palette reduction for PIL.Image image. Parameters: img (PIL.Image) : image to be processed. reduced_palette_colors_count (int) : count of colors for processed image. Returns: processed_image (PIL.Image) : processed image with pallete count reduced. """ if not isinstance(img, PIL.Image.Image): raise TypeError( "apply_palette_reduction: expected img of type PIL.Image, got {0}". format(type(img))) processed_image = img.convert('P', palette=Image.ADAPTIVE, colors=reduced_palette_colors_count) return processed_image
def calculate_polygonal_environment( im: PIL.Image.Image = None, baselines: Sequence[Sequence[Tuple[int, int]]] = None, suppl_obj: Sequence[Sequence[Tuple[int, int]]] = None, im_feats: np.array = None, scale: Tuple[int, int] = None): """ Given a list of baselines and an input image, calculates a polygonal environment around each baseline. Args: im (PIL.Image): grayscale input image (mode 'L') baselines (sequence): List of lists containing a single baseline per entry. suppl_obj (sequence): List of lists containing additional polylines that should be considered hard boundaries for polygonizaton purposes. Can be used to prevent polygonization into non-text areas such as illustrations or to compute the polygonization of a subset of the lines in an image. im_feats (numpy.array): An optional precomputed seamcarve energy map. Overrides data in `im`. The default map is `gaussian_filter(sobel(im), 2)`. scale (tuple): A 2-tuple (h, w) containing optional scale factors of the input. Values of 0 are used for aspect-preserving scaling. `None` skips input scaling. Returns: List of lists of coordinates. If no polygonization could be compute for a baseline `None` is returned instead. """ if scale is not None and (scale[0] > 0 or scale[1] > 0): w, h = im.size oh, ow = scale if oh == 0: oh = int(h * ow / w) elif ow == 0: ow = int(w * oh / h) im = im.resize((ow, oh)) scale = np.array((ow / w, oh / h)) # rescale baselines baselines = [(np.array(bl) * scale).astype('int').tolist() for bl in baselines] # rescale suppl_obj if suppl_obj is not None: suppl_obj = [(np.array(bl) * scale).astype('int').tolist() for bl in suppl_obj] if im_feats is None: bounds = np.array(im.size, dtype=np.float) - 1 im = np.array(im.convert('L')) # compute image gradient im_feats = gaussian_filter(sobel(im), 0.5) else: bounds = np.array(im_feats.shape[::-1], dtype=np.float) - 1 def _ray_intersect_boundaries(ray, direction, aabb): """ Simplified version of [0] for 2d and AABB anchored at (0,0). [0] http://gamedev.stackexchange.com/questions/18436/most-efficient-aabb-vs-ray-collision-algorithms """ dir_fraction = np.empty(2, dtype=ray.dtype) dir_fraction[direction == 0.0] = np.inf dir_fraction[direction != 0.0] = np.divide(1.0, direction[direction != 0.0]) t1 = (-ray[0]) * dir_fraction[0] t2 = (aabb[0] - ray[0]) * dir_fraction[0] t3 = (-ray[1]) * dir_fraction[1] t4 = (aabb[1] - ray[1]) * dir_fraction[1] tmin = max(min(t1, t2), min(t3, t4)) tmax = min(max(t1, t2), max(t3, t4)) t = min(x for x in [tmin, tmax] if x >= 0) return ray + (direction * t) def _calc_seam(baseline, polygon, angle, bias=150): """ Calculates seam between baseline and ROI boundary on one side. Adds a baseline-distance-weighted bias to the feature map, masks out the bounding polygon and rotates the line so it is roughly level. """ MASK_VAL = 99999 r, c = draw.polygon(polygon[:, 1], polygon[:, 0]) c_min, c_max = int(polygon[:, 0].min()), int(polygon[:, 0].max()) r_min, r_max = int(polygon[:, 1].min()), int(polygon[:, 1].max()) patch = im_feats[r_min:r_max + 2, c_min:c_max + 2].copy() # bias feature matrix by distance from baseline mask = np.ones_like(patch) for l in zip(baseline[:-1] - (c_min, r_min), baseline[1:] - (c_min, r_min)): line_locs = draw.line(l[0][1], l[0][0], l[1][1], l[1][0]) mask[line_locs] = 0 dist_bias = distance_transform_cdt(mask) # absolute mask mask = np.ones_like(patch, dtype=np.bool) mask[r - r_min, c - c_min] = False # combine weights with features patch[mask] = MASK_VAL patch += (dist_bias * (np.mean(patch[patch != MASK_VAL]) / bias)) extrema = baseline[(0, -1), :] - (c_min, r_min) # scale line image to max 600 pixel width scale = min(1.0, 600 / (c_max - c_min)) tform, rotated_patch = _rotate(patch, angle, center=extrema[0], scale=scale, cval=MASK_VAL) # ensure to cut off padding after rotation x_offsets = np.sort( np.around(tform.inverse(extrema)[:, 0]).astype('int')) rotated_patch = rotated_patch[:, x_offsets[0]:x_offsets[1] + 1] # infinity pad for seamcarve rotated_patch = np.pad(rotated_patch, ((1, 1), (0, 0)), mode='constant', constant_values=np.inf) r, c = rotated_patch.shape # fold into shape (c, r-2 3) A = np.lib.stride_tricks.as_strided( rotated_patch, (c, r - 2, 3), (rotated_patch.strides[1], rotated_patch.strides[0], rotated_patch.strides[0])) B = rotated_patch[1:-1, 1:].swapaxes(0, 1) backtrack = np.zeros_like(B, dtype='int') T = np.empty((B.shape[1]), 'f') R = np.arange(-1, len(T) - 1) for i in np.arange(c - 1): A[i].min(1, T) backtrack[i] = A[i].argmin(1) + R B[i] += T # backtrack seam = [] j = np.argmin(rotated_patch[1:-1, -1]) for i in range(c - 2, -2, -1): seam.append((i + x_offsets[0] + 1, j)) j = backtrack[i, j] seam = np.array(seam)[::-1] seam_mean = seam[:, 1].mean() seam_std = seam[:, 1].std() seam[:, 1] = np.clip(seam[:, 1], seam_mean - seam_std, seam_mean + seam_std) # rotate back seam = tform(seam).astype('int') # filter out seam points in masked area of original patch/in padding seam = seam[seam.min(axis=1) >= 0, :] m = (seam < mask.shape[::-1]).T seam = seam[np.logical_and(m[0], m[1]), :] seam = seam[np.invert(mask[seam.T[1], seam.T[0]])] seam += (c_min, r_min) return seam def _extract_patch(env_up, env_bottom, baseline, dir_vec): """ Calculate a line image patch from a ROI and the original baseline. """ upper_polygon = np.concatenate((baseline, env_up[::-1])) bottom_polygon = np.concatenate((baseline, env_bottom[::-1])) angle = np.arctan2(dir_vec[1], dir_vec[0]) upper_seam = _calc_seam(baseline, upper_polygon, angle) bottom_seam = _calc_seam(baseline, bottom_polygon, angle) polygon = np.concatenate( ([baseline[0]], upper_seam.astype('int'), [baseline[-1]], bottom_seam.astype('int')[::-1])) return approximate_polygon(polygon, 3).tolist() polygons = [] if suppl_obj is None: suppl_obj = [] for idx, line in enumerate(baselines): try: # find intercepts with image bounds on each side of baseline line = np.array(line, dtype=np.float) # calculate magnitude-weighted average direction vector lengths = np.linalg.norm(np.diff(line.T), axis=0) p_dir = np.mean(np.diff(line.T) * lengths / lengths.sum(), axis=1) p_dir = (p_dir.T / np.sqrt(np.sum(p_dir**2, axis=-1))) # interpolate baseline ls = geom.LineString(line) ip_line = [line[0]] dist = 10 while dist < ls.length: ip_line.append(np.array(ls.interpolate(dist))) dist += 10 ip_line.append(line[-1]) ip_line = np.array(ip_line) upper_bounds_intersects = [] bottom_bounds_intersects = [] for point in ip_line: upper_bounds_intersects.append( _ray_intersect_boundaries(point, (p_dir * (-1, 1))[::-1], bounds + 1).astype('int')) bottom_bounds_intersects.append( _ray_intersect_boundaries(point, (p_dir * (1, -1))[::-1], bounds + 1).astype('int')) # build polygon between baseline and bbox intersects upper_polygon = geom.Polygon(ip_line.tolist() + upper_bounds_intersects) bottom_polygon = geom.Polygon(ip_line.tolist() + bottom_bounds_intersects) # select baselines at least partially in each polygon side_a = [geom.LineString(upper_bounds_intersects)] side_b = [geom.LineString(bottom_bounds_intersects)] for adj_line in baselines[:idx] + baselines[idx + 1:] + suppl_obj: adj_line = geom.LineString(adj_line) if upper_polygon.intersects(adj_line): side_a.append(adj_line) elif bottom_polygon.intersects(adj_line): side_b.append(adj_line) side_a = unary_union(side_a).buffer(1).boundary side_b = unary_union(side_b).buffer(1).boundary def _find_closest_point(pt, intersects): spt = geom.Point(pt) if intersects.type == 'MultiPoint': return min([p for p in intersects], key=lambda x: spt.distance(x)) elif intersects.type == 'Point': return intersects elif intersects.type == 'GeometryCollection' and len( intersects) > 0: t = min([p for p in intersects], key=lambda x: spt.distance(x)) if t == 'Point': return t else: return nearest_points(spt, t)[1] else: raise Exception( 'No intersection with boundaries. Shapely intersection object: {}' .format(intersects.wkt)) # interpolate baseline env_up = [] env_bottom = [] # find orthogonal (to linear regression) intersects with adjacent objects to complete roi for point, upper_bounds_intersect, bottom_bounds_intersect in zip( ip_line, upper_bounds_intersects, bottom_bounds_intersects): upper_limit = _find_closest_point( point, geom.LineString([point, upper_bounds_intersect ]).intersection(side_a)) bottom_limit = _find_closest_point( point, geom.LineString([point, bottom_bounds_intersect ]).intersection(side_b)) env_up.append(upper_limit.coords[0]) env_bottom.append(bottom_limit.coords[0]) env_up = np.array(env_up, dtype='uint') env_bottom = np.array(env_bottom, dtype='uint') polygons.append( _extract_patch(env_up, env_bottom, line.astype('int'), p_dir)) except Exception as e: logger.warning(f'Polygonizer failed on line {idx}: {e}') polygons.append(None) if scale is not None: polygons = [ (np.array(pol) / scale).astype('uint').tolist() if pol is not None else None for pol in polygons ] return polygons
def calculate_polygonal_environment( im: PIL.Image.Image = None, baselines: Sequence[Sequence[Tuple[int, int]]] = None, suppl_obj: Sequence[Sequence[Tuple[int, int]]] = None, im_feats: np.ndarray = None, scale: Tuple[int, int] = None, topline: bool = False): """ Given a list of baselines and an input image, calculates a polygonal environment around each baseline. Args: im (PIL.Image): grayscale input image (mode 'L') baselines (sequence): List of lists containing a single baseline per entry. suppl_obj (sequence): List of lists containing additional polylines that should be considered hard boundaries for polygonizaton purposes. Can be used to prevent polygonization into non-text areas such as illustrations or to compute the polygonization of a subset of the lines in an image. im_feats (numpy.array): An optional precomputed seamcarve energy map. Overrides data in `im`. The default map is `gaussian_filter(sobel(im), 2)`. scale (tuple): A 2-tuple (h, w) containing optional scale factors of the input. Values of 0 are used for aspect-preserving scaling. `None` skips input scaling. topline (bool): Switch to change default baseline location for offset calculation purposes. If set to False, baselines are assumed to be on the bottom of the text line and will be offset upwards, if set to True, baselines are on the top and will be offset downwards. If set to None, no offset will be applied. Returns: List of lists of coordinates. If no polygonization could be compute for a baseline `None` is returned instead. """ if scale is not None and (scale[0] > 0 or scale[1] > 0): w, h = im.size oh, ow = scale if oh == 0: oh = int(h * ow / w) elif ow == 0: ow = int(w * oh / h) im = im.resize((ow, oh)) scale = np.array((ow / w, oh / h)) # rescale baselines baselines = [(np.array(bl) * scale).astype('int').tolist() for bl in baselines] # rescale suppl_obj if suppl_obj is not None: suppl_obj = [(np.array(bl) * scale).astype('int').tolist() for bl in suppl_obj] if im_feats is None: bounds = np.array(im.size, dtype=float) - 1 im = np.array(im.convert('L')) # compute image gradient im_feats = gaussian_filter(sobel(im), 0.5) else: bounds = np.array(im_feats.shape[::-1], dtype=float) - 1 polygons = [] if suppl_obj is None: suppl_obj = [] for idx, line in enumerate(baselines): try: end_points = (line[0], line[-1]) line = geom.LineString(line) offset = default_specs.SEGMENTATION_HYPER_PARAMS[ 'line_width'] if topline is not None else 0 offset_line = line.parallel_offset( offset, side='left' if topline else 'right') line = np.array(line, dtype=float) offset_line = np.array(offset_line, dtype=float) # parallel_offset on the right reverses the coordinate order if not topline: offset_line = offset_line[::-1] # calculate magnitude-weighted average direction vector lengths = np.linalg.norm(np.diff(line.T), axis=0) p_dir = np.mean(np.diff(line.T) * lengths / lengths.sum(), axis=1) p_dir = (p_dir.T / np.sqrt(np.sum(p_dir**2, axis=-1))) env_up, env_bottom = _calc_roi( line, bounds, baselines[:idx] + baselines[idx + 1:], suppl_obj, p_dir) polygons.append( _extract_patch(env_up, env_bottom, line.astype('int'), offset_line.astype('int'), end_points, p_dir, topline, offset, im_feats)) except Exception as e: logger.warning(f'Polygonizer failed on line {idx}: {e}') polygons.append(None) if scale is not None: polygons = [ (np.array(pol) / scale).astype('uint').tolist() if pol is not None else None for pol in polygons ] return polygons