def __collect_info(self): self._embedding_layer = self._coords.get_parent_layer() assert isinstance(self._embedding_layer, SkeletonEmbeddingLayer), d_msg( self._context, "coords tensor's parent layer must be of type SkeletonEmbeddingLayer, " f"received parent layer type={type(self._embedding_layer)}, " f"parent layer name={self._embedding_layer.get_name()}" ) # The method returns a list. Convert it to ndarray self._source_embedding = np.array(self._embedding_layer.get_embedding()) # Make a check of the absolute values of the embedding. They should be normalized and approximately within # [-1, 1] interval. if np.max(np.abs(self._source_embedding)) > 1.4: print(d_msg( self._context, "It seems the embedding's values are not normalized. This is not an error, but the values should " "be centered and lie approximately within the [-1, 1] interval. Received embedding with " f"maximum absolute value of {np.max(np.abs(self._source_embedding))}" )) self._embedding_bounding_box = make_box(self._source_embedding) width = self._embedding_bounding_box[2] - self._embedding_bounding_box[0] height = self._embedding_bounding_box[3] - self._embedding_bounding_box[1] # Determine how much this bounding box differs from the default one. # Default box has the following coordinates: # - top left point = [-1, -1] # - bottom right point = [1, 1] self._bbox_configuration = [width / 2.0, height / 2.0] coords_shape = self._coords.get_shape() self._grid_size = coords_shape[1:-1]
def forward(self, x, computation_mode=MakiLayer.INFERENCE_MODE): # Do not add the name_scope since in future it won't be used anyway _, h, w, c = x.get_shape().as_list() assert c == self._embedding_dim * 2, d_msg( self.get_name(), 'The depth of the input tensor must twice as large as the embedding dimensionality. ' f'Received input tensor channels={c}, embedding dimensionality*2={self._embedding_dim * 2}' ) offsets = x grid = SkeletonEmbeddingLayer.generate_grid_stacked((w, h), self._embedding) with tf.name_scope('GridCorrection'): # This scaling is required to make the offsets be # approximately in the range [-1, 1] scale = np.array([w, h], dtype='float32') flatten = lambda t: tf.reshape(t, shape=[-1, h, w, self._embedding_dim * 2]) unflatten = lambda t: tf.reshape(t, shape=[-1, h, w, self._embedding_dim, 2]) grid = unflatten(grid) upscaled_grid = grid * scale upscaled_grid = flatten(upscaled_grid) corrected_grid = upscaled_grid + offsets corrected_grid = unflatten(corrected_grid) downscaled_grid = corrected_grid / scale downscaled_grid = flatten(downscaled_grid) return downscaled_grid
def __init__(self, coords: tf.Tensor, point_indicators: tf.Tensor, human_indicators: tf.Tensor, configuration): """ An entity that encapsulates all the tensors necessary to make predictions on a particular grid. It makes makes sure the shapes are synchronized and also collects necessary info for the trainer. Parameters ---------- coords : tf.Tensor Tensor of the regressed coordinates of the skeleton points. Must lie approximately within the [-1, 1] interval. point_indicators : tf.Tensor Tensor of binary indicators of whether a particular point of the skeleton is visible. human_indicators : tf.Tensor Tensor of binary indicators of whether a human is present in a particular location of the grid. """ self._context = f'SSP HeadLabel({coords.name}, {point_indicators.name}, {human_indicators.name}, {configuration})' self._coords = coords self._point_indicators = point_indicators self._human_indicators = human_indicators assert len(configuration) == 4, d_msg( self._context, f'Configuration must has length=4, received length={configuration}' ) h, w, w_scale, h_scale = configuration self._grid_size = [h, w] self._bbox_config = [w_scale, h_scale] self.__check_dimensionality()
def __check_dimensionality(self): # All have dimensions [b, h, w, c] coords_shape = self._coords.get_shape() point_indicators_shape = self._point_indicators.get_shape() human_indicators_shape = self._human_indicators.get_shape() # Only convolutional networks are supported assert len(coords_shape) == 4 and \ len(point_indicators_shape) == 4 and \ len(human_indicators_shape) == 4, d_msg( self._context, 'Dimensionality of all tensors must be 4, received ' f'dim(coords)={len(coords_shape)}, ' f'dim(point_indicators)={len(point_indicators_shape)}, ' f'dim(human_indicators)={len(human_indicators_shape)}' ) # Check spatial shape (h, w) assert coords_shape[1:-1] == point_indicators_shape[1:-1] and \ coords_shape[1:-1] == human_indicators_shape[1:-1] and \ point_indicators_shape[1:-1] == human_indicators_shape[1:-1], d_msg( self._context, 'Spatial shapes are not aligned. Received ' f'coords_shape={coords_shape}, ' f'point_indicators_shape={point_indicators_shape}, ' f'human_indicators_shape={human_indicators_shape}' ) # Check alignment of the number of points between coords and point indicators n_coords = coords_shape[-1] assert n_coords % 2 == 0, d_msg( self._context, f'coords must have an even number of channel, received {n_coords}.' ) n_points = n_coords // 2 assert n_points == point_indicators_shape[-1], d_msg( self._context, f'Number of points in coords and point_indicators must be the same, ' f'received {n_points} and {point_indicators_shape[-1]}.' ) # Check whether human_indicators has a single channel assert human_indicators_shape[-1] == 1, d_msg( self._context, f'human_indicators tensor must have 1 channel, received {human_indicators_shape[-1]}.' )
def _setup_inference(self): # Collect tensors from every head. point_indicators_logits = [] human_indicators_logits = [] regressed_points = [] for head in self._heads: point_indicators_logits += [ head.get_point_indicators().get_data_tensor() ] human_indicators_logits += [ head.get_human_indicators().get_data_tensor() ] regressed_points += [head.get_coords().get_data_tensor()] def flatten(x): b, h, w, c = x.get_shape().as_list() return tf.reshape(x, shape=[b, h * w, c]) point_indicators_logits = list(map(flatten, point_indicators_logits)) human_indicators_logits = list(map(flatten, human_indicators_logits)) regressed_points = list(map(flatten, regressed_points)) # If any of the lists is empty, it will be difficult to handle it using tf messages. # Hence this check is here. assert len(point_indicators_logits) != 0 and \ len(human_indicators_logits) != 0 and \ len(regressed_points) != 0, d_msg( self._name, 'Length of the logits or regressed points is zero. ' f'len(point_indicators_logits)={len(point_indicators_logits)}, ' f'len(human_indicators_logits)={len(human_indicators_logits)}, ' f'len(regressed_points)={len(regressed_points)}. ' f'This is probably because the list of the heads is empty.' ) # Concatenate the collected tensors self._point_indicators_logits = tf.concat(point_indicators_logits, axis=1) self._human_indicators_logits = tf.concat(human_indicators_logits, axis=1) regressed_points = tf.concat(regressed_points, axis=1) b, n, c = regressed_points.get_shape().as_list() w, h = self.get_image_size() regressed_points = tf.reshape(regressed_points, shape=[b, n, c // 2, 2]) # Scale the grid: [-1, 1] -> [-w/2, w/2] regressed_points = regressed_points * np.array([w / 2, h / 2], dtype='float32') # Shift the grid: [-w/2, w/2] -> [0, w] regressed_points = regressed_points + np.array([w / 2, h / 2], dtype='float32') self._regressed_points = regressed_points # Used in predict self._point_indicators = tf.nn.sigmoid(self._point_indicators_logits) self._human_indicators = tf.nn.sigmoid(self._human_indicators_logits)
def __init__(self, embedding_dim: int, name: str, custom_embedding: list = None): """ Creates a grid of default skeletons. These skeletons are then trained using gradient descent. The grids values are in the [-1, 1] values. Parameters ---------- embedding_dim : int How many points are in the skeleton. name : str Name of the layer. custom_embedding : list of shape [n_points, 2] List containing custom skeleton embedding. It must be noted, that the embedding's values must be centered and normalized within [-1, 1] interval (or approximately so, you can use larger ones for the purpose of more dense coverage of the grid), because it will be put into a grid with values within [-1, 1] interval. """ if not isinstance(embedding_dim, int): assert custom_embedding is not None, d_msg( name, 'embedding_dim is not of the type int. In this case the custom_embedding is expected to be ' 'provided, but the custom_embedding=None.' ) else: assert embedding_dim >= 2, d_msg( name, f'embedding_dim must be at least 2. Received embedding_dim={embedding_dim}' ) if custom_embedding is not None: embedding_dim = len(custom_embedding) assert len(custom_embedding) >= 2, d_msg( name, f'Length of the custom_embedding must be at least 2. Received custom_embedding with ' f'len={len(custom_embedding)}' ) assert len(custom_embedding[0]) == 2, d_msg( name, f"custom_embedding's points are not 2-dimensional. " f"Received custom_embedding with {len(custom_embedding[0])}-dimensional points." ) if not isinstance(custom_embedding, list): print(d_msg( name, f'custom_embedding is not a list. Received custom_embedding of ' f'type={type(custom_embedding)}.') ) print(d_msg( name, 'Iterating over the custom_embedding to convert it to a list.') ) custom_embedding = self.__embed2list(custom_embedding) self._embedding_dim = embedding_dim self._custom_embedding = custom_embedding if custom_embedding is None: print(d_msg(name, 'No custom embedding is provided. Creating a random one.')) self._custom_embedding = np.random.uniform(low=-1.0, high=1.0, size=[embedding_dim, 2]).tolist() # Artificially insert border points. This is required to have a consistent behaviour # between different runs. The configuration of the default boxes is highly dependent # on the result of the randomization. Therefore, we artificially restrict the # resulting bounding box configuration to that of the default one - (1, 1). self._custom_embedding[0] = [-1, -1] self._custom_embedding[1] = [1, 1] embedding = np.array(self._custom_embedding) with tf.name_scope(name): self._embedding = tf.Variable(embedding, dtype='float32', name='SkeletonEmbedding') super().__init__( name=name, params=[self._embedding], regularize_params=[], named_params_dict={self._embedding.name: self._embedding} )