def clip_to_image_transform(width, height): r""" Affine transform that converts 3D clip space coordinates into 2D image space coordinates. Note that the z axis of the clip space coordinates is ignored. Parameters ---------- width: int The width of the image height: int The height of the image Returns ------- :map`Homogeneous` A homogeneous transform that moves clip space coordinates into image space. """ # 1. Remove the z axis from the clip space rem_z = dims_3to2() # 2. invert the y direction (up becomes down) invert_y = Scale([1, -1]) # 3. [-1, 1] [-1, 1] -> [0, 2] [0, 2] t = Translation([1, 1]) # 4. [0, 2] [0, 2] -> [0, 1] [0, 1] unit_scale = Scale(0.5, n_dims=2) # 5. [0, 1] [0, 1] -> [0, w - 1] [0, h - 1] im_scale = Scale([width - 1, height - 1]) # 6. [0, w] [0, h] -> [0, h] [0, w] xy_yx = Homogeneous( np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]], dtype=np.float)) # reduce the full transform chain to a single affine matrix transforms = [rem_z, invert_y, t, unit_scale, im_scale, xy_yx] return reduce(lambda a, b: a.compose_before(b), transforms)
def scale_compose_after_inplace_homog_test(): # can't do this inplace - so should just give transform chain homog = Homogeneous(np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]])) s = Scale([3, 4]) s.compose_after_inplace(homog)
def per_vertex_occlusion(mesh_in_img, err_proportion=0.0001, render_diag=600): [x_r, y_r, z_r] = mesh_in_img.range() av_xy_r = (x_r + y_r) / 2.0 rescale = render_diag / np.sqrt((mesh_in_img.range()[:2] ** 2).sum()) rescale_z = av_xy_r / z_r mesh = Scale([rescale, rescale, rescale * rescale_z]).apply(mesh_in_img) mesh.points[...] = mesh.points - mesh.points.min(axis=0) mesh.points[:, :2] = mesh.points[:, :2] + 2 shape = np.around(mesh.points.max(axis=0)[:2] + 2) print("shape!!!:", shape) bc, ti = rasterize_barycentric_coordinate_images(mesh, shape) si = rasterize_shape_image_from_barycentric_coordinate_images( as_colouredtrimesh(mesh), bc, ti) # err_proportion=0.01 is 1% deviation of total range of 3D shape threshold = render_diag * err_proportion xyz_found = si.as_unmasked().sample(mesh.with_dims([0, 1]), order=1).T err = np.sum((xyz_found - mesh.points) ** 2, axis=1) visible = err < threshold return visible
def tcoords_pixel_scaled(self): r""" Returns a :map:`PointCloud` that is modified to be suitable for directly indexing into the pixels of the texture (e.g. for manual mapping operations). The resulting tcoords behave just like image landmarks do. The operations that are performed are: - Flipping the origin from bottom-left to top-left - Scaling the tcoords by the image shape (denormalising them) - Permuting the axis so that Returns ------- tcoords_scaled : :map:`PointCloud` A copy of the tcoords that behave like :map:`Image` landmarks Examples -------- Recovering pixel values for every texture coordinate: >>> texture = texturedtrimesh.texture >>> tc_ps = texturedtrimesh.tcoords_pixel_scaled() >>> pixel_values_at_tcs = texture[tc_ps[: ,0], tc_ps[:, 1]] """ scale = Scale(np.array(self.texture.shape)[::-1]) tcoords = self.tcoords.points.copy() # flip the 'y' st 1 -> 0 and 0 -> 1, moving the axis to upper left tcoords[:, 1] = 1 - tcoords[:, 1] # apply the scale to get the units correct tcoords = scale.apply(tcoords) # flip axis 0 and axis 1 so indexing is as expected tcoords = tcoords[:, ::-1] return PointCloud(tcoords)
def per_vertex_occlusion_gl_rasterizer(mesh, err_proportion=0.0001, err_norm='z', render_width=3000): # Render a high-resolution shape image for visibility testing # z scale can be very large for high focal lengths. # ensure z is scaled to match x/y for the purposes of masking. print('scaling down ') [x_r, y_r, z_r] = mesh.range() av_xy_r = (x_r + y_r) / 2.0 mesh = Scale([1, 1, av_xy_r / z_r]).apply(mesh) model_to_image_transform, shape_image = render_hi_res_shape_image( mesh, render_width=render_width) # err_proportion=0.01 is 1% deviation of total range of 3D shape err_scale = mesh.range()[2].sum() if err_norm == 'z' else np.sqrt( (mesh.range()**2).sum()) threshold = err_scale * err_proportion sample_points_3d = mesh sample_points_2d = model_to_image_transform.apply(sample_points_3d) xyz_found = shape_image.as_unmasked().sample(sample_points_2d, order=1).T err = np.sum((xyz_found - sample_points_3d.points)**2, axis=1) return err < threshold
def shapes(self, as_points=False): r""" Generates a list containing the shapes obtained at each fitting iteration. Parameters ----------- as_points: boolean, optional Whether the results is returned as a list of PointClouds or ndarrays. Default: False Returns ------- shapes: :class:`menpo.shape.PointCoulds or ndarray list A list containing the shapes obtained at each fitting iteration. """ n = self.n_levels - 1 shapes = [] for j, f in enumerate(self.fittings): if self.scaled_levels: transform = Scale(self.downscale ** (n - j), 2) for t in f.shapes(as_points=as_points): transform.apply_inplace(t) shapes.append(self._affine_correction.apply(t)) else: for t in f.shapes(as_points=as_points): shapes.append(self._affine_correction.apply(t)) return shapes
def shapes(self, as_points=False): r""" Generates a list containing the shapes obtained at each fitting iteration. Parameters ----------- as_points: boolean, optional Whether the results is returned as a list of PointClouds or ndarrays. Default: False Returns ------- shapes: :class:`menpo.shape.PointCoulds or ndarray list A list containing the shapes obtained at each fitting iteration. """ n = self.n_levels - 1 shapes = [] for j, f in enumerate(self.fitting_results): if self.scaled_levels: transform = Scale(self.downscale**(n - j), 2) for t in f.shapes(as_points=as_points): transform.apply_inplace(t) shapes.append(self._affine_correction.apply(t)) else: for t in f.shapes(as_points=as_points): shapes.append(self._affine_correction.apply(t)) return shapes
def shapes(self, as_points=False): r""" Generates a list containing the shapes obtained at each fitting iteration. Parameters ----------- as_points : `boolean`, optional Whether the result is returned as a `list` of :map:`PointCloud` or a `list` of `ndarrays`. Returns ------- shapes : `list` of :map:`PointCoulds` or `list` of `ndarray` A list containing the fitted shapes at each iteration of the fitting procedure. """ shapes = [] for j, (alg, s) in enumerate(zip(self.algorithm_results, self.scales)): transform = Scale(self.scales[-1]/s, alg.final_shape.n_dims) for t in alg.shapes(as_points=as_points): t = transform.apply(t) shapes.append(self._affine_correction.apply(t)) return shapes
def shapes(self, as_points=False): r""" Generates a list containing the shapes obtained at each fitting iteration. Parameters ----------- as_points : `boolean`, optional Whether the result is returned as a `list` of :map:`PointCloud` or a `list` of `ndarrays`. Returns ------- shapes : `list` of :map:`PointCoulds` or `list` of `ndarray` A list containing the fitted shapes at each iteration of the fitting procedure. """ shapes = [] for j, (alg, s) in enumerate(zip(self.algorithm_results, self.scales)): transform = Scale(self.scales[-1] / s, alg.final_shape.n_dims) for t in alg.shapes(as_points=as_points): t = transform.apply(t) shapes.append(self._affine_correction.apply(t)) return shapes
def test_scale_compose_after_inplace_homog(): # can't do this inplace - so should just give transform chain homog = Homogeneous(np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]])) s = Scale([3, 4]) with raises(ValueError): s.compose_after_inplace(homog)
def __init__(self, image_shape): # flip axis 0 and axis 1 so indexing is as expected flip_xy = Homogeneous(np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]])) # scale to get the units correct scale = Scale(image_shape).pseudoinverse self.flip_and_scale = scale.compose_before(flip_xy)
def _rescale_shapes_to_reference(algorithm_results, scales, affine_correction): r""" """ shapes = [] for j, (alg, scale) in enumerate(zip(algorithm_results, scales)): transform = Scale(scales[-1] / scale, alg.final_shape.n_dims) for shape in alg.shapes: shape = transform.apply(shape) shapes.append(affine_correction.apply(shape)) return shapes
def _rescale_shapes_to_reference(algorithm_results, scales, affine_correction): r""" """ shapes = [] for j, (alg, scale) in enumerate(zip(algorithm_results, scales)): transform = Scale(scales[-1]/scale, alg.final_shape.n_dims) for shape in alg.shapes: shape = transform.apply(shape) shapes.append(affine_correction.apply(shape)) return shapes
def _rescale_shapes_to_reference(fitting_results, n_levels, downscale, affine_correction): n = n_levels - 1 shapes = [] for j, f in enumerate(fitting_results): transform = Scale(downscale ** (n - j), f.final_shape.n_dims) for t in f.shapes: t = transform.apply(t) shapes.append(affine_correction.apply(t)) return shapes
def chain_compose_after_inplace_chain_test(): a = PointCloud(np.random.random([10, 2])) b = PointCloud(np.random.random([10, 2])) t = Translation([3, 4]) s = Scale([4, 2]) chain_1 = TransformChain([t, s]) chain_2 = TransformChain([s.pseudoinverse(), t.pseudoinverse()]) chain_1.compose_before_inplace(chain_2) points = PointCloud(np.random.random([10, 2])) chain_res = chain_1.apply(points) assert(np.allclose(points.points, chain_res.points))
def chain_compose_before_tps_test(): a = PointCloud(np.random.random([10, 2])) b = PointCloud(np.random.random([10, 2])) tps = ThinPlateSplines(a, b) t = Translation([3, 4]) s = Scale([4, 2]) chain = TransformChain([t, s]) chain_mod = chain.compose_before(tps) points = PointCloud(np.random.random([10, 2])) manual_res = tps.apply(s.apply(t.apply(points))) chain_res = chain_mod.apply(points) assert(np.all(manual_res.points == chain_res.points))
def clip_to_image(height, width): # 2. invert the y direction (up becomes down) invert_y = Scale([1, -1]) # 3. [-1, 1] [-1, 1] -> [0, 2] [0, 2] t = Translation([1, 1]) # 4. [0, 2] [0, 2] -> [0, 1] [0, 1] unit_scale = Scale(0.5, n_dims=2) # 5. [0, 1] [0, 1] -> [0, w] [0, h] im_scale = Scale([width, height]) # 6. [0, w] [0, h] -> [0, h] [0, w] xy_yx = Homogeneous( np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]], dtype=np.float)) # reduce the full transform chain to a single affine matrix transforms = [invert_y, t, unit_scale, im_scale, xy_yx] return reduce(lambda a, b: a.compose_before(b), transforms)
def test_chain_compose_after_inplace_tps(): a = PointCloud(np.random.random([10, 2])) b = PointCloud(np.random.random([10, 2])) tps = ThinPlateSplines(a, b) t = Translation([3, 4]) s = Scale([4, 2]) chain = TransformChain([t, s]) chain.compose_after_inplace(tps) points = PointCloud(np.random.random([10, 2])) manual_res = s.apply(t.apply(tps.apply(points))) chain_res = chain.apply(points) assert (np.all(manual_res.points == chain_res.points))
def _preprocessing(cls, images, group, label, diagonal_range, interpolator, scaled_levels, n_levels, downscale): r""" """ print(' - Computing reference shape') shapes = [i.landmarks[group][label].lms for i in images] reference_shape = mean_pointcloud(shapes) print(' - Normalizing object size') if diagonal_range: x, y = reference_shape.range() scale = diagonal_range / np.sqrt(x**2 + y**2) Scale(scale, reference_shape.n_dims).apply_inplace( reference_shape) images = [i.rescale_to_reference_shape(reference_shape, group=group, label=label, interpolator=interpolator) for i in images] print(' - Generating multilevel scale space') if scaled_levels: generator = [i.gaussian_pyramid(n_levels=n_levels, downscale=downscale) for i in images] else: generator = [i.smoothing_pyramid(n_levels=n_levels, downscale=downscale) for i in images] return reference_shape, generator
def crop_image(img, center, scale, res, base=384): h = base * scale t = Translation( [ res[0] * (-center[0] / h + .5), res[1] * (-center[1] / h + .5) ]) \ .compose_after( Scale( (res[0] / h, res[1] / h) )).pseudoinverse() # Upper left point of original image ul = np.floor(t.apply([0, 0])) # Bottom right point of original image br = np.ceil(t.apply(res).astype(np.int)) # crop and rescale cimg, trans = img.warp_to_shape(br - ul, Translation(-(br - ul) / 2 + (br + ul) / 2), return_transform=True) c_scale = np.min(cimg.shape) / np.mean(res) new_img = cimg.rescale(1 / c_scale).resize(res) return new_img, trans, c_scale
def homog_compose_after_inplace_scale_test(): # this should be fine homog = Homogeneous(np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]])) s = Scale([3, 4]) homog.compose_after_inplace(s) assert_allclose(homog.h_matrix, np.array([[0, 4, 0], [3, 0, 0], [0, 0, 1]]))
def compute_reference_shape(shapes, diagonal, verbose=False): r""" Function that computes the reference shape as the mean shape of the provided shapes. Parameters ---------- shapes : `list` of `menpo.shape.PointCloud` The set of shapes from which to build the reference shape. diagonal : `int` or ``None`` If `int`, it ensures that the mean shape is scaled so that the diagonal of the bounding box containing it matches the provided value. If ``None``, then the mean shape is not rescaled. verbose : `bool`, optional If ``True``, then progress information is printed. Returns ------- reference_shape : `menpo.shape.PointCloud` The reference shape. """ # the reference_shape is the mean shape of the images' landmarks if verbose: print_dynamic('- Computing reference shape') reference_shape = mean_pointcloud(shapes) # fix the reference_shape's diagonal length if asked if diagonal: x, y = reference_shape.range() scale = diagonal / np.sqrt(x**2 + y**2) reference_shape = Scale(scale, reference_shape.n_dims).apply(reference_shape) return reference_shape
def _fit(self, images, initial_shape, gt_shapes=None, max_iters=50, **kwargs): r""" Fits the algorithm to the multilevel pyramidal images. Parameters ----------- images: :class:`menpo.image.masked.MaskedImage` list The images to be fitted. initial_shape: :class:`menpo.shape.PointCloud` The initial shape from which the fitting will start. gt_shapes: :class:`menpo.shape.PointCloud` list, optional The original ground truth shapes associated to the multilevel images. Default: None max_iters: int or list, optional The maximum number of iterations. If int, then this will be the overall maximum number of iterations for all the pyramidal levels. If list, then a maximum number of iterations is specified for each pyramidal level. Default: 50 Returns ------- algorithm_results: :class:`menpo.fg2015.fittingresult.FittingResult` list The fitting object containing the state of the whole fitting procedure. """ max_iters = self._prepare_max_iters(max_iters) shape = initial_shape gt_shape = None algorithm_results = [] for j, (i, alg, it, s) in enumerate( zip(images, self._algorithms, max_iters, self.scales)): if gt_shapes: gt_shape = gt_shapes[j] algorithm_result = alg.run(i, shape, gt_shape=gt_shape, max_iters=it, **kwargs) algorithm_results.append(algorithm_result) shape = algorithm_result.final_shape if s != self.scales[-1]: Scale(self.scales[j + 1] / s, n_dims=shape.n_dims).apply_inplace(shape) return algorithm_results
def model_to_clip_transform(points, xy_scale=0.9, z_scale=0.3): r""" Produces an Affine Transform which centres and scales 3D points to fit into the OpenGL clipping space ([-1, 1], [-1, 1], [1, 1-]). This can be used to construct an appropriate projection matrix for use in an orthographic Rasterizer. Note that the z-axis is flipped as is default in OpenGL - as a result this transform converts the right handed coordinate input into a left hand one. Parameters ---------- points: :map:`PointCloud` The points that should be adjusted. xy_scale: `float` 0-1, optional Amount by which the boundary is relaxed so the points are not right against the edge. A value of 1 means the extremities of the point cloud will be mapped onto [-1, 1] [-1, 1] exactly (no boarder) A value of 0.5 means the points will be mapped into the range [-0.5, 0.5]. Default: 0.9 (map to [-0.9, 0.9]) z_scale: float 0-1, optional Scale factor by which the z-dimension is squeezed. A value of 1 means the z-range of the points will be mapped to exactly fit in [1, -1]. A scale of 0.1 means the z-range is compressed to fit in the range [0.1, -0.1]. Returns ------- :map:`Affine` The affine transform that creates this mapping """ # 1. Centre the points on the origin center = Translation(points.centre_of_bounds()).pseudoinverse() # 2. Scale the points to exactly fit the boundaries scale = Scale(points.range() / 2.0) # 3. Apply the relaxations requested - note the flip in the z axis!! # This is because OpenGL by default evaluates depth as bigger number == # further away. Thus not only do we need to get to clip space [-1, 1] in # all dims) but we must invert the z axis so depth buffering is correctly # applied. b_scale = NonUniformScale([xy_scale, xy_scale, -z_scale]) return center.compose_before(scale.pseudoinverse()).compose_before(b_scale)
def _rescale_reference_shape(self): r""" Function that rescales the reference shape w.r.t. to ``normalization_diagonal`` parameter. """ if self.normalization_diagonal: x, y = self.reference_shape.range() scale = self.normalization_diagonal / np.sqrt(x**2 + y**2) Scale(scale, self.reference_shape.n_dims).apply_inplace( self.reference_shape)
def render_initialization(images, mm, id_indices, exp_indices, template_camera, p, qs, cs, img_index): c_i = cs[img_index] q_i = qs[img_index] i_in_img = instance_for_params(mm, id_indices, exp_indices, template_camera, p, q_i, c_i)['instance_in_img'] [x_r, y_r, z_r] = i_in_img.range() av_xy_r = (x_r + y_r) / 2.0 i_in_img = Scale([1, 1, av_xy_r / z_r]).apply(i_in_img) mesh_in_img_lit = lambertian_shading(as_colouredtrimesh(i_in_img)) return rasterize_mesh(mesh_in_img_lit, images[0].shape).as_unmasked()
def initial_shape(self): r""" The initial shape from which the fitting started. :type: :map:`PointCloud` """ n = self.n_levels - 1 initial_shape = self.fitting_results[0].initial_shape Scale(self.downscale ** n, initial_shape.n_dims).apply_inplace( initial_shape) return self._affine_correction.apply(initial_shape)
def initial_shape(self): r""" Returns the initial shape from which the fitting started. """ n = self.n_levels - 1 initial_shape = self.fitting_results[0].initial_shape if self.scaled_levels: Scale(self.downscale**n, initial_shape.n_dims).apply_inplace(initial_shape) return self._affine_correction.apply(initial_shape)
def _compute_reference_shape(self, images, group, label, verbose): # the reference_shape is the mean shape of the images' landmarks if verbose: print_dynamic('- Computing reference shape') shapes = [i.landmarks[group][label] for i in images] ref_shape = mean_pointcloud(shapes) # fix the reference_shape's diagonal length if specified if self.diagonal: x, y = ref_shape.range() scale = self.diagonal / np.sqrt(x**2 + y**2) Scale(scale, ref_shape.n_dims).apply_inplace(ref_shape) return ref_shape
def render_iteration(mm, id_ind, exp_ind, img_shape, camera, params, img_index, iteration): params_i = params[iteration] c_i = params_i['cs'][img_index] p_i = params_i['p'] q_i = params_i['qs'][img_index] i_in_img = instance_for_params(mm, id_ind, exp_ind, camera, p_i, q_i, c_i)['instance_in_img'] [x_r, y_r, z_r] = i_in_img.range() av_xy_r = (x_r + y_r) / 2.0 i_in_img = Scale([1, 1, av_xy_r / z_r]).apply(i_in_img) mesh_in_img_lit = lambertian_shading(as_colouredtrimesh(i_in_img)) return rasterize_mesh(mesh_in_img_lit, img_shape).as_unmasked()
def flip_aam(aam): # flip reference shape faam = copy.deepcopy(aam) faam.reference_shape = Scale([1, -1]).apply(aam.reference_shape) # flip models for sm, am, ps in zip(faam.shape_models, faam.appearance_models, aam.patch_shape): # flip shape model mean sm.mean_vector = Scale([1, -1]).apply(sm.mean()).points.flatten() # flip appearance model mean img = am.mean() am.mean_vector = img.pixels[:, :, :, :, -1::-1].flatten() # flip shape model components ncomponents, _ = sm._components.shape sc = sm._components.reshape(ncomponents, -1, 2) sc[:, :, 1] *= -1 sm._components = sc.reshape(ncomponents, -1) # flip appearance components ncomponents, _ = am._components.shape am._components = am._components.reshape( (ncomponents, ) + img.pixels.shape)[:, :, :, :, :, -1::-1].reshape( ncomponents, -1) return faam
def prepare_template_reference_space(template): r"""Return a copy of the template centred at the origin and with max radial distance from centre of 1. This means the template is: 1. fully contained by a bounding sphere of radius 1 at the origin 2. centred at the origin. This isn't necessary, but it's nice to have a meaningful reference space for our models. """ max_radial = np.sqrt( ((template.points - template.centre())**2).sum(axis=1)).max() translation = Translation(-template.centre()) scale = Scale(1 / max_radial, n_dims=3) adjustment = translation.compose_before(scale) adjustment.apply(template) return adjustment.apply(template)
def tcoords_to_image_coords(image_shape): r""" Returns a :map:`Homogeneous` transform that converts [0,1] texture coordinates (tcoords) used on :map:`TexturedTriMesh` instances to image coordinates, which behave just like image landmarks do. The operations that are performed are: - Flipping the origin from bottom-left to top-left - Permuting the axis so that st (or uv) -> yx - Scaling the tcoords by the image shape (denormalising them). Note that (1, 1) has to map to the highest pixel value, which is actually (h - 1, w - 1) due to Menpo being 0-based with image operations. Parameters ---------- image_shape : `tuple` The shape of the texture that the tcoords index in to. Returns ------- :map:`Homogeneous` A transform that, when applied to texture coordinates, converts them to image coordinates. """ # flip the 'y' st 1 -> 0 and 0 -> 1, moving the axis to upper left invert_unit_y = Homogeneous( np.array([[1.0, 0.0, 0.0], [0.0, -1.0, 1.0], [0.0, 0.0, 1.0]]) ) # flip axis 0 and axis 1 so indexing is as expected flip_xy_yx = Homogeneous( np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) ) return invert_unit_y.compose_before(flip_xy_yx).compose_before( Scale(np.array(image_shape) - 1) )
def _parse_format(self, asset=None): with open(self.filepath, 'r') as f: landmarks = f.read() # Remove comments and blank lines landmarks = [ l for l in landmarks.splitlines() if (l.rstrip() and not '#' in l) ] # Pop the front of the list for the number of landmarks count = int(landmarks.pop(0)) # Pop the last element of the list for the image_name image_name = landmarks.pop() xs = np.empty([count, 1]) ys = np.empty([count, 1]) connectivity = np.empty([count, 2], dtype=np.int) for i in xrange(count): # Though unpacked, they are still all strings # Only unpack the first 7 (path_num, path_type, xpos, ypos, point_num, connects_from, connects_to) = landmarks[i].split()[:7] xs[i, ...] = float(xpos) ys[i, ...] = float(ypos) connectivity[i, ...] = [int(connects_from), int(connects_to)] points = self._build_points(xs, ys) if asset is not None: # we've been given an asset. As ASF files are normalized, # fix that here points = Scale(np.array(asset.shape)).apply(points) # TODO: Use connectivity and create a graph type instead of PointCloud # edges = scaled_points[connectivity] self.pointcloud = PointCloud(points) self.labels_to_masks = OrderedDict([('all', np.ones(points.shape[0], dtype=np.bool))])
def _train_batch(self, image_batch, increment=False, group=None, bounding_box_group_glob=None, verbose=False): # Rescale to existing reference shape image_batch = rescale_images_to_reference_shape( image_batch, group, self.reference_shape, verbose=verbose) generated_bb_func = generate_perturbations_from_gt( image_batch, self.n_perturbations, self._perturb_from_gt_bounding_box, gt_group=group, bb_group_glob=bounding_box_group_glob, verbose=verbose) # for each scale (low --> high) current_shapes = [] for j in range(self.n_scales): if verbose: if len(self.scales) > 1: scale_prefix = ' - Scale {}: '.format(j) else: scale_prefix = ' - ' else: scale_prefix = None # Handle holistic features if j == 0 and self.holistic_features[j] == no_op: # Saves a lot of memory feature_images = image_batch elif j == 0 or self.holistic_features[j] is not self.holistic_features[j - 1]: # Compute features only if this is the first pass through # the loop or the features at this scale are different from # the features at the previous scale feature_images = compute_features(image_batch, self.holistic_features[j], prefix=scale_prefix, verbose=verbose) # handle scales if self.scales[j] != 1: # Scale feature images only if scale is different than 1 scaled_images = scale_images(feature_images, self.scales[j], prefix=scale_prefix, verbose=verbose) else: scaled_images = feature_images # Extract scaled ground truth shapes for current scale scaled_shapes = [i.landmarks[group].lms for i in scaled_images] if j == 0: msg = '{}Aligning reference shape with bounding boxes.'.format( scale_prefix) wrap = partial(print_progress, prefix=msg, end_with_newline=False, verbose=verbose) # Extract perturbations at the very bottom level for ii in wrap(scaled_images): c_shapes = [] for bbox in generated_bb_func(ii): c_s = align_shape_with_bounding_box( self.reference_shape, bbox) c_shapes.append(c_s) current_shapes.append(c_shapes) # train supervised descent algorithm if not increment: current_shapes = self.algorithms[j].train( scaled_images, scaled_shapes, current_shapes, prefix=scale_prefix, verbose=verbose) else: current_shapes = self.algorithms[j].increment( scaled_images, scaled_shapes, current_shapes, prefix=scale_prefix, verbose=verbose) # Scale current shapes to next resolution, don't bother # scaling final level if j != (self.n_scales - 1): transform = Scale(self.scales[j + 1] / self.scales[j], n_dims=2) for image_shapes in current_shapes: for shape in image_shapes: transform.apply_inplace(shape)
def _train(self, original_images, group=None, bounding_box_group_glob=None, verbose=False): r""" """ # Dlib does not support incremental builds, so we must be passed a list if not isinstance(original_images, list): original_images = list(original_images) # We use temporary landmark groups - so we need the group key to not be # None if group is None: group = original_images[0].landmarks.group_labels[0] # Temporarily store all the bounding boxes for rescaling for i in original_images: i.landmarks['__gt_bb'] = i.landmarks[group].lms.bounding_box() if self.reference_shape is None: # If no reference shape was given, use the mean of the first batch self.reference_shape = compute_reference_shape( [i.landmarks['__gt_bb'].lms for i in original_images], self.diagonal, verbose=verbose) # Rescale to existing reference shape images = rescale_images_to_reference_shape( original_images, '__gt_bb', self.reference_shape, verbose=verbose) # Scaling is done - remove temporary gt bounding boxes for i, i2 in zip(original_images, images): del i.landmarks['__gt_bb'] del i2.landmarks['__gt_bb'] generated_bb_func = generate_perturbations_from_gt( images, self.n_perturbations, self._perturb_from_gt_bounding_box, gt_group=group, bb_group_glob=bounding_box_group_glob, verbose=verbose) # for each scale (low --> high) current_bounding_boxes = [] for j in range(self.n_scales): if verbose: if len(self.scales) > 1: scale_prefix = ' - Scale {}: '.format(j) else: scale_prefix = ' - ' else: scale_prefix = None # handle scales if self.scales[j] != 1: # Scale feature images only if scale is different than 1 scaled_images = scale_images(images, self.scales[j], prefix=scale_prefix, verbose=verbose) else: scaled_images = images if j == 0: current_bounding_boxes = [generated_bb_func(im) for im in scaled_images] # Extract scaled ground truth shapes for current scale scaled_gt_shapes = [i.landmarks[group].lms for i in scaled_images] # Train the Dlib model current_bounding_boxes = self.algorithms[j].train( scaled_images, scaled_gt_shapes, current_bounding_boxes, prefix=scale_prefix, verbose=verbose) # Scale current shapes to next resolution, don't bother # scaling final level if j != (self.n_scales - 1): transform = Scale(self.scales[j + 1] / self.scales[j], n_dims=2) for bboxes in current_bounding_boxes: for bb in bboxes: transform.apply_inplace(bb)
def _train_batch(self, template, shape_batch, increment=False, group=None, shape_forgetting_factor=1.0, verbose=False): r""" Builds an Active Template Model from a list of landmarked images. """ # build models at each scale if verbose: print_dynamic('- Building models\n') feature_images = [] # for each scale (low --> high) for j in range(self.n_scales): if verbose: if len(self.scales) > 1: scale_prefix = ' - Scale {}: '.format(j) else: scale_prefix = ' - ' else: scale_prefix = None # Handle features if j == 0 or self.holistic_features[j] is not self.holistic_features[j - 1]: # Compute features only if this is the first pass through # the loop or the features at this scale are different from # the features at the previous scale feature_images = compute_features([template], self.holistic_features[j], prefix=scale_prefix, verbose=verbose) # handle scales if self.scales[j] != 1: # Scale feature images only if scale is different than 1 scaled_images = scale_images(feature_images, self.scales[j], prefix=scale_prefix, verbose=verbose) # Extract potentially rescaled shapes scale_transform = Scale(scale_factor=self.scales[j], n_dims=2) scale_shapes = [scale_transform.apply(s) for s in shape_batch] else: scaled_images = feature_images scale_shapes = shape_batch # Build the shape model if verbose: print_dynamic('{}Building shape model'.format(scale_prefix)) if not increment: if j == 0: shape_model = self._build_shape_model(scale_shapes, j) self.shape_models.append(shape_model) else: self.shape_models.append(deepcopy(shape_model)) else: self._increment_shape_model( scale_shapes, self.shape_models[j], forgetting_factor=shape_forgetting_factor) # Obtain warped images - we use a scaled version of the # reference shape, computed here. This is because the mean # moves when we are incrementing, and we need a consistent # reference frame. scaled_reference_shape = Scale(self.scales[j], n_dims=2).apply( self.reference_shape) warped_template = self._warp_template(scaled_images[0], group, scaled_reference_shape, j, scale_prefix, verbose) self.warped_templates.append(warped_template[0]) if verbose: print_dynamic('{}Done\n'.format(scale_prefix)) # Because we just copy the shape model, we need to wait to trim # it after building each model. This ensures we can have a different # number of components per level for j, sm in enumerate(self.shape_models): max_sc = self.max_shape_components[j] if max_sc is not None: sm.trim_components(max_sc)
def _train_batch( self, template, shape_batch, increment=False, group=None, shape_forgetting_factor=1.0, verbose=False ): r""" Builds an Active Template Model from a list of landmarked images. """ # build models at each scale if verbose: print_dynamic("- Building models\n") feature_images = [] # for each scale (low --> high) for j in range(self.n_scales): if verbose: if len(self.scales) > 1: scale_prefix = " - Scale {}: ".format(j) else: scale_prefix = " - " else: scale_prefix = None # Handle features if j == 0 or self.holistic_features[j] is not self.holistic_features[j - 1]: # Compute features only if this is the first pass through # the loop or the features at this scale are different from # the features at the previous scale feature_images = compute_features( [template], self.holistic_features[j], prefix=scale_prefix, verbose=verbose ) # handle scales if self.scales[j] != 1: # Scale feature images only if scale is different than 1 scaled_images = scale_images(feature_images, self.scales[j], prefix=scale_prefix, verbose=verbose) # Extract potentially rescaled shapes scale_transform = Scale(scale_factor=self.scales[j], n_dims=2) scale_shapes = [scale_transform.apply(s) for s in shape_batch] else: scaled_images = feature_images scale_shapes = shape_batch # Build the shape model if verbose: print_dynamic("{}Building shape model".format(scale_prefix)) if not increment: shape_model = self._build_shape_model(scale_shapes, j) self.shape_models.append(shape_model) else: self._increment_shape_model(scale_shapes, j, forgetting_factor=shape_forgetting_factor) # Obtain warped images - we use a scaled version of the # reference shape, computed here. This is because the mean # moves when we are incrementing, and we need a consistent # reference frame. scaled_reference_shape = Scale(self.scales[j], n_dims=2).apply(self.reference_shape) warped_template = self._warp_template( scaled_images[0], group, scaled_reference_shape, j, scale_prefix, verbose ) self.warped_templates.append(warped_template[0]) if verbose: print_dynamic("{}Done\n".format(scale_prefix))