def predict(self, image: PIL.Image.Image, top_k: int = 3) -> List[InferencerPrediction]: """ Predict labels for image :param image: :param top_k: :return: """ # resize the input image and preprocess it image = image.resize(self.target_size) image = tf.keras.preprocessing.image.img_to_array(image) image = tf.keras.applications.mobilenet_v2.preprocess_input(image) image = np.expand_dims(image, axis=0) # pass to model result = self.classifier.predict(image) result = sorted( list(zip( self.labels , np.squeeze(result).tolist() ) ) , key=lambda x: x[1] , reverse=True ) result = result[:top_k] res = [InferencerPrediction(label=r[0], confidence=r[1]) for r in result] return res
def _resize_image_pair( img1: PIL.Image.Image, img2: PIL.Image.Image, trim1: bool = True, trim2: bool = True, ) -> typing.Tuple[PIL.Image.Image, PIL.Image.Image]: def _trim_helper(img): return _trim_image( _trim_image( _quantize_color(img, distance=15), # hardcoded threshold border_color=PIL.ImageColor.getcolor("white", "RGBA"))) img1 = _trim_helper(img1) if trim1 else img1 img2 = _trim_helper(img2) if trim2 else img2 w = max(img1.width, img2.width) h = max(img1.height, img2.height) return img1.resize((w, h)), img2.resize((w, h))
def make_power_2(img: PIL.Image.Image, base: int, interp_method=Image.BICUBIC) -> np.ndarray: ow, oh = img.size h = int(np.round(oh / base) * base) w = int(np.round(ow / base) * base) if (h == oh) and (w == ow): return img return img.resize((w, h), interp_method)
def preprocess_pil_image(self, pil_image: PIL.Image.Image): """Дообработка изображения в формате PIL""" # Конвертировать изображение в формат RGB, если оно еще не в этом формате. if pil_image.mode != "RGB": pil_image = pil_image.convert("RGB") # Изменить размер изображения на подходящий нейросети. pil_image = pil_image.resize((self.image_size_x, self.image_size_y)) # Преобразовать изображения из формата PIL в трехмерный массив Numpy. np_array = image.img_to_array(pil_image) # Выполнить обработку изображения собственной функцией препроцессинга нейросети. return self.preprocess_input(np.expand_dims(np_array, axis=0))
def postprocess_image(img: torch.Tensor, target_img: PIL.Image.Image) -> PIL.Image.Image: assert img.shape[0] == 1 and img.shape[1] == 3 assert isinstance(target_img, PIL.Image.Image) # resize target image if needed (= if it was resized in preprocessing) source_size = (img.shape[3], img.shape[2]) target_size = target_img.size target_img = target_img.resize(source_size, resample=PIL.Image.LANCZOS) # convert both source and target to numpy target_img_numpy = np.array(target_img) img_numpy = img.numpy().squeeze().transpose(1, 2, 0)[:, :, ::-1] result = histogram_matching(img_numpy, target_img_numpy) result_pil = PIL.Image.fromarray(result.astype(np.uint8)) return result_pil.resize(target_size, resample=PIL.Image.LANCZOS)
def resize_keep_aspect_ratio( image: PIL.Image.Image, target_width: int, target_height: int ) -> (PIL.Image.Image, str): """ Resize image to target width, but keep the aspect ratio to avoid distorting the image. :param image: original image :param target_width: the width we want :param target_height: the height we want :return: a tuple of resized image, and string saying if it was resized by height or width """ ratio_width = target_width / image.width ratio_height = target_height / image.height if ratio_width > ratio_height: resized_by = "width" resize_width = target_width resize_height = round(ratio_width * image.height) else: resized_by = "height" resize_width = round(ratio_height * image.width) resize_height = target_height resized_image = image.resize((resize_width, resize_height), Image.LANCZOS) return resized_image, resized_by
def preprocess_image( image: PIL.Image.Image, new_size: int = 256, mean: np.ndarray = np.array([0.40760392, 0.45795686, 0.48501961]) ) -> torch.Tensor: assert isinstance(image, PIL.Image.Image) # use PIL here because it resamples properly # (https://twitter.com/jaakkolehtinen/status/1258102168176951299) image = image.resize((new_size, new_size), resample=PIL.Image.LANCZOS) # RGB to BGR r, g, b = image.split() image_bgr = PIL.Image.merge('RGB', (b, g, r)) # normalization image_numpy = np.array(image_bgr, dtype=np.float32) / 255.0 image_numpy -= mean image_numpy *= 255.0 # [H, W, C] -> [N, C, H, W] image_numpy = np.transpose(image_numpy, (2, 0, 1))[None, :, :, :] return torch.from_numpy(image_numpy).to(torch.float32)
def resize_img(img: PIL.Image.Image, size: list) -> PIL.Image.Image: return img.resize(size=size, resample=Image.ANTIALIAS)
def _preprocess(query: PIL.Image.Image): img = query.resize((416, 416)) img = np.array(img).astype(np.float32) img = np.rollaxis(img, 2, 0) img = np.expand_dims(img, axis=0) return img
def calculate_polygonal_environment( im: PIL.Image.Image = None, baselines: Sequence[Sequence[Tuple[int, int]]] = None, suppl_obj: Sequence[Sequence[Tuple[int, int]]] = None, im_feats: np.array = None, scale: Tuple[int, int] = None): """ Given a list of baselines and an input image, calculates a polygonal environment around each baseline. Args: im (PIL.Image): grayscale input image (mode 'L') baselines (sequence): List of lists containing a single baseline per entry. suppl_obj (sequence): List of lists containing additional polylines that should be considered hard boundaries for polygonizaton purposes. Can be used to prevent polygonization into non-text areas such as illustrations or to compute the polygonization of a subset of the lines in an image. im_feats (numpy.array): An optional precomputed seamcarve energy map. Overrides data in `im`. The default map is `gaussian_filter(sobel(im), 2)`. scale (tuple): A 2-tuple (h, w) containing optional scale factors of the input. Values of 0 are used for aspect-preserving scaling. `None` skips input scaling. Returns: List of lists of coordinates. If no polygonization could be compute for a baseline `None` is returned instead. """ if scale is not None and (scale[0] > 0 or scale[1] > 0): w, h = im.size oh, ow = scale if oh == 0: oh = int(h * ow / w) elif ow == 0: ow = int(w * oh / h) im = im.resize((ow, oh)) scale = np.array((ow / w, oh / h)) # rescale baselines baselines = [(np.array(bl) * scale).astype('int').tolist() for bl in baselines] # rescale suppl_obj if suppl_obj is not None: suppl_obj = [(np.array(bl) * scale).astype('int').tolist() for bl in suppl_obj] if im_feats is None: bounds = np.array(im.size, dtype=np.float) - 1 im = np.array(im.convert('L')) # compute image gradient im_feats = gaussian_filter(sobel(im), 0.5) else: bounds = np.array(im_feats.shape[::-1], dtype=np.float) - 1 def _ray_intersect_boundaries(ray, direction, aabb): """ Simplified version of [0] for 2d and AABB anchored at (0,0). [0] http://gamedev.stackexchange.com/questions/18436/most-efficient-aabb-vs-ray-collision-algorithms """ dir_fraction = np.empty(2, dtype=ray.dtype) dir_fraction[direction == 0.0] = np.inf dir_fraction[direction != 0.0] = np.divide(1.0, direction[direction != 0.0]) t1 = (-ray[0]) * dir_fraction[0] t2 = (aabb[0] - ray[0]) * dir_fraction[0] t3 = (-ray[1]) * dir_fraction[1] t4 = (aabb[1] - ray[1]) * dir_fraction[1] tmin = max(min(t1, t2), min(t3, t4)) tmax = min(max(t1, t2), max(t3, t4)) t = min(x for x in [tmin, tmax] if x >= 0) return ray + (direction * t) def _calc_seam(baseline, polygon, angle, bias=150): """ Calculates seam between baseline and ROI boundary on one side. Adds a baseline-distance-weighted bias to the feature map, masks out the bounding polygon and rotates the line so it is roughly level. """ MASK_VAL = 99999 r, c = draw.polygon(polygon[:, 1], polygon[:, 0]) c_min, c_max = int(polygon[:, 0].min()), int(polygon[:, 0].max()) r_min, r_max = int(polygon[:, 1].min()), int(polygon[:, 1].max()) patch = im_feats[r_min:r_max + 2, c_min:c_max + 2].copy() # bias feature matrix by distance from baseline mask = np.ones_like(patch) for l in zip(baseline[:-1] - (c_min, r_min), baseline[1:] - (c_min, r_min)): line_locs = draw.line(l[0][1], l[0][0], l[1][1], l[1][0]) mask[line_locs] = 0 dist_bias = distance_transform_cdt(mask) # absolute mask mask = np.ones_like(patch, dtype=np.bool) mask[r - r_min, c - c_min] = False # combine weights with features patch[mask] = MASK_VAL patch += (dist_bias * (np.mean(patch[patch != MASK_VAL]) / bias)) extrema = baseline[(0, -1), :] - (c_min, r_min) # scale line image to max 600 pixel width scale = min(1.0, 600 / (c_max - c_min)) tform, rotated_patch = _rotate(patch, angle, center=extrema[0], scale=scale, cval=MASK_VAL) # ensure to cut off padding after rotation x_offsets = np.sort( np.around(tform.inverse(extrema)[:, 0]).astype('int')) rotated_patch = rotated_patch[:, x_offsets[0]:x_offsets[1] + 1] # infinity pad for seamcarve rotated_patch = np.pad(rotated_patch, ((1, 1), (0, 0)), mode='constant', constant_values=np.inf) r, c = rotated_patch.shape # fold into shape (c, r-2 3) A = np.lib.stride_tricks.as_strided( rotated_patch, (c, r - 2, 3), (rotated_patch.strides[1], rotated_patch.strides[0], rotated_patch.strides[0])) B = rotated_patch[1:-1, 1:].swapaxes(0, 1) backtrack = np.zeros_like(B, dtype='int') T = np.empty((B.shape[1]), 'f') R = np.arange(-1, len(T) - 1) for i in np.arange(c - 1): A[i].min(1, T) backtrack[i] = A[i].argmin(1) + R B[i] += T # backtrack seam = [] j = np.argmin(rotated_patch[1:-1, -1]) for i in range(c - 2, -2, -1): seam.append((i + x_offsets[0] + 1, j)) j = backtrack[i, j] seam = np.array(seam)[::-1] seam_mean = seam[:, 1].mean() seam_std = seam[:, 1].std() seam[:, 1] = np.clip(seam[:, 1], seam_mean - seam_std, seam_mean + seam_std) # rotate back seam = tform(seam).astype('int') # filter out seam points in masked area of original patch/in padding seam = seam[seam.min(axis=1) >= 0, :] m = (seam < mask.shape[::-1]).T seam = seam[np.logical_and(m[0], m[1]), :] seam = seam[np.invert(mask[seam.T[1], seam.T[0]])] seam += (c_min, r_min) return seam def _extract_patch(env_up, env_bottom, baseline, dir_vec): """ Calculate a line image patch from a ROI and the original baseline. """ upper_polygon = np.concatenate((baseline, env_up[::-1])) bottom_polygon = np.concatenate((baseline, env_bottom[::-1])) angle = np.arctan2(dir_vec[1], dir_vec[0]) upper_seam = _calc_seam(baseline, upper_polygon, angle) bottom_seam = _calc_seam(baseline, bottom_polygon, angle) polygon = np.concatenate( ([baseline[0]], upper_seam.astype('int'), [baseline[-1]], bottom_seam.astype('int')[::-1])) return approximate_polygon(polygon, 3).tolist() polygons = [] if suppl_obj is None: suppl_obj = [] for idx, line in enumerate(baselines): try: # find intercepts with image bounds on each side of baseline line = np.array(line, dtype=np.float) # calculate magnitude-weighted average direction vector lengths = np.linalg.norm(np.diff(line.T), axis=0) p_dir = np.mean(np.diff(line.T) * lengths / lengths.sum(), axis=1) p_dir = (p_dir.T / np.sqrt(np.sum(p_dir**2, axis=-1))) # interpolate baseline ls = geom.LineString(line) ip_line = [line[0]] dist = 10 while dist < ls.length: ip_line.append(np.array(ls.interpolate(dist))) dist += 10 ip_line.append(line[-1]) ip_line = np.array(ip_line) upper_bounds_intersects = [] bottom_bounds_intersects = [] for point in ip_line: upper_bounds_intersects.append( _ray_intersect_boundaries(point, (p_dir * (-1, 1))[::-1], bounds + 1).astype('int')) bottom_bounds_intersects.append( _ray_intersect_boundaries(point, (p_dir * (1, -1))[::-1], bounds + 1).astype('int')) # build polygon between baseline and bbox intersects upper_polygon = geom.Polygon(ip_line.tolist() + upper_bounds_intersects) bottom_polygon = geom.Polygon(ip_line.tolist() + bottom_bounds_intersects) # select baselines at least partially in each polygon side_a = [geom.LineString(upper_bounds_intersects)] side_b = [geom.LineString(bottom_bounds_intersects)] for adj_line in baselines[:idx] + baselines[idx + 1:] + suppl_obj: adj_line = geom.LineString(adj_line) if upper_polygon.intersects(adj_line): side_a.append(adj_line) elif bottom_polygon.intersects(adj_line): side_b.append(adj_line) side_a = unary_union(side_a).buffer(1).boundary side_b = unary_union(side_b).buffer(1).boundary def _find_closest_point(pt, intersects): spt = geom.Point(pt) if intersects.type == 'MultiPoint': return min([p for p in intersects], key=lambda x: spt.distance(x)) elif intersects.type == 'Point': return intersects elif intersects.type == 'GeometryCollection' and len( intersects) > 0: t = min([p for p in intersects], key=lambda x: spt.distance(x)) if t == 'Point': return t else: return nearest_points(spt, t)[1] else: raise Exception( 'No intersection with boundaries. Shapely intersection object: {}' .format(intersects.wkt)) # interpolate baseline env_up = [] env_bottom = [] # find orthogonal (to linear regression) intersects with adjacent objects to complete roi for point, upper_bounds_intersect, bottom_bounds_intersect in zip( ip_line, upper_bounds_intersects, bottom_bounds_intersects): upper_limit = _find_closest_point( point, geom.LineString([point, upper_bounds_intersect ]).intersection(side_a)) bottom_limit = _find_closest_point( point, geom.LineString([point, bottom_bounds_intersect ]).intersection(side_b)) env_up.append(upper_limit.coords[0]) env_bottom.append(bottom_limit.coords[0]) env_up = np.array(env_up, dtype='uint') env_bottom = np.array(env_bottom, dtype='uint') polygons.append( _extract_patch(env_up, env_bottom, line.astype('int'), p_dir)) except Exception as e: logger.warning(f'Polygonizer failed on line {idx}: {e}') polygons.append(None) if scale is not None: polygons = [ (np.array(pol) / scale).astype('uint').tolist() if pol is not None else None for pol in polygons ] return polygons
def convert_pilimage_to_nparray(pil_image: PIL.Image.Image) -> np.array: NUM_RGB = 3 IMG_WIDTH, IMG_HEIGHT = 150, 150 SCALE = 255 np_image = np.array(pil_image.resize((IMG_WIDTH, IMG_HEIGHT))) / SCALE return np_image.reshape((1, IMG_WIDTH, IMG_HEIGHT, NUM_RGB))
def calculate_polygonal_environment( im: PIL.Image.Image = None, baselines: Sequence[Sequence[Tuple[int, int]]] = None, suppl_obj: Sequence[Sequence[Tuple[int, int]]] = None, im_feats: np.ndarray = None, scale: Tuple[int, int] = None, topline: bool = False): """ Given a list of baselines and an input image, calculates a polygonal environment around each baseline. Args: im (PIL.Image): grayscale input image (mode 'L') baselines (sequence): List of lists containing a single baseline per entry. suppl_obj (sequence): List of lists containing additional polylines that should be considered hard boundaries for polygonizaton purposes. Can be used to prevent polygonization into non-text areas such as illustrations or to compute the polygonization of a subset of the lines in an image. im_feats (numpy.array): An optional precomputed seamcarve energy map. Overrides data in `im`. The default map is `gaussian_filter(sobel(im), 2)`. scale (tuple): A 2-tuple (h, w) containing optional scale factors of the input. Values of 0 are used for aspect-preserving scaling. `None` skips input scaling. topline (bool): Switch to change default baseline location for offset calculation purposes. If set to False, baselines are assumed to be on the bottom of the text line and will be offset upwards, if set to True, baselines are on the top and will be offset downwards. If set to None, no offset will be applied. Returns: List of lists of coordinates. If no polygonization could be compute for a baseline `None` is returned instead. """ if scale is not None and (scale[0] > 0 or scale[1] > 0): w, h = im.size oh, ow = scale if oh == 0: oh = int(h * ow / w) elif ow == 0: ow = int(w * oh / h) im = im.resize((ow, oh)) scale = np.array((ow / w, oh / h)) # rescale baselines baselines = [(np.array(bl) * scale).astype('int').tolist() for bl in baselines] # rescale suppl_obj if suppl_obj is not None: suppl_obj = [(np.array(bl) * scale).astype('int').tolist() for bl in suppl_obj] if im_feats is None: bounds = np.array(im.size, dtype=float) - 1 im = np.array(im.convert('L')) # compute image gradient im_feats = gaussian_filter(sobel(im), 0.5) else: bounds = np.array(im_feats.shape[::-1], dtype=float) - 1 polygons = [] if suppl_obj is None: suppl_obj = [] for idx, line in enumerate(baselines): try: end_points = (line[0], line[-1]) line = geom.LineString(line) offset = default_specs.SEGMENTATION_HYPER_PARAMS[ 'line_width'] if topline is not None else 0 offset_line = line.parallel_offset( offset, side='left' if topline else 'right') line = np.array(line, dtype=float) offset_line = np.array(offset_line, dtype=float) # parallel_offset on the right reverses the coordinate order if not topline: offset_line = offset_line[::-1] # calculate magnitude-weighted average direction vector lengths = np.linalg.norm(np.diff(line.T), axis=0) p_dir = np.mean(np.diff(line.T) * lengths / lengths.sum(), axis=1) p_dir = (p_dir.T / np.sqrt(np.sum(p_dir**2, axis=-1))) env_up, env_bottom = _calc_roi( line, bounds, baselines[:idx] + baselines[idx + 1:], suppl_obj, p_dir) polygons.append( _extract_patch(env_up, env_bottom, line.astype('int'), offset_line.astype('int'), end_points, p_dir, topline, offset, im_feats)) except Exception as e: logger.warning(f'Polygonizer failed on line {idx}: {e}') polygons.append(None) if scale is not None: polygons = [ (np.array(pol) / scale).astype('uint').tolist() if pol is not None else None for pol in polygons ] return polygons