def call(self, x, mask=None): ''' Return an anchor box tensor based on the shape of the input tensor. The logic implemented here is identical to the logic in the module `ssd_box_encode_decode_utils.py`. Note that this tensor does not participate in any graph computations at runtime. It is being created as a constant once during graph creation and is just being output along with the rest of the model output during runtime. Because of this, all logic is implemented as Numpy array operations and it is sufficient to convert the resulting Numpy array into a Keras tensor at the very end before outputting it. Arguments: x (tensor): 4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'` or `(batch, height, width, channels)` if `dim_ordering = 'tf'`. The input for this layer must be the output of the localization predictor layer. ''' # Compute box width and height for each aspect ratio # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`. size = min(self.img_height, self.img_width) # Compute the box widths and and heights for all aspect ratios wh_list = [] for ar in self.aspect_ratios: if (ar == 1): # Compute the regular anchor box for aspect ratio 1. box_height = box_width = self.this_scale * size wh_list.append((box_width, box_height)) if self.two_boxes_for_ar1: # Compute one slightly larger version using the geometric mean of this scale value and the next. box_height = box_width = np.sqrt( self.this_scale * self.next_scale) * size wh_list.append((box_width, box_height)) else: box_height = self.this_scale * size / np.sqrt(ar) box_width = self.this_scale * size * np.sqrt(ar) wh_list.append((box_width, box_height)) wh_list = np.array(wh_list) # We need the shape of the input tensor if K.image_dim_ordering() == 'tf': batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future batch_size, feature_map_channels, feature_map_height, feature_map_width = x._keras_shape # Compute the grid of box center points. They are identical for all aspect ratios. # Compute the step sizes, i.e. how far apart the anchor box center points will be vertically and horizontally. if (self.this_steps is None): step_height = self.img_height / feature_map_height step_width = self.img_width / feature_map_width else: if isinstance(self.this_steps, (list, tuple)) and (len(self.this_steps) == 2): step_height = self.this_steps[0] step_width = self.this_steps[1] elif isinstance(self.this_steps, (int, float)): step_height = self.this_steps step_width = self.this_steps # Compute the offsets, i.e. at what pixel values the first anchor box center point will be from the top and from the left of the image. if (self.this_offsets is None): offset_height = 0.5 offset_width = 0.5 else: if isinstance(self.this_offsets, (list, tuple)) and (len(self.this_offsets) == 2): offset_height = self.this_offsets[0] offset_width = self.this_offsets[0] elif isinstance(self.this_offsets, (int, float)): offset_height = self.this_offsets offset_width = self.this_offsets # Now that we have the offsets and step sizes, compute the grid of anchor box center points. cy = np.linspace(offset_height * step_height, (offset_height + feature_map_height - 1) * step_height, feature_map_height) cx = np.linspace(offset_width * step_width, (offset_width + feature_map_width - 1) * step_width, feature_map_width) cx_grid, cy_grid = np.meshgrid(cx, cy) cx_grid = np.expand_dims( cx_grid, -1 ) # This is necessary for np.tile() to do what we want further down cy_grid = np.expand_dims( cy_grid, -1 ) # This is necessary for np.tile() to do what we want further down # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)` # where the last dimension will contain `(cx, cy, w, h)` boxes_tensor = np.zeros( (feature_map_height, feature_map_width, self.n_boxes, 4)) boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h # Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)` boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2corners') # If `limit_boxes` is enabled, clip the coordinates to lie within the image boundaries if self.limit_boxes: x_coords = boxes_tensor[:, :, :, [0, 2]] x_coords[x_coords >= self.img_width] = self.img_width - 1 x_coords[x_coords < 0] = 0 boxes_tensor[:, :, :, [0, 2]] = x_coords y_coords = boxes_tensor[:, :, :, [1, 3]] y_coords[y_coords >= self.img_height] = self.img_height - 1 y_coords[y_coords < 0] = 0 boxes_tensor[:, :, :, [1, 3]] = y_coords # If `normalize_coords` is enabled, normalize the coordinates to be within [0,1] if self.normalize_coords: boxes_tensor[:, :, :, [0, 2]] /= self.img_width boxes_tensor[:, :, :, [1, 3]] /= self.img_height # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth. if self.coords == 'centroids': # Convert `(xmin, ymin, xmax, ymax)` back to `(cx, cy, w, h)`. boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2centroids') elif self.coords == 'minmax': # Convert `(xmin, ymin, xmax, ymax)` to `(xmin, xmax, ymin, ymax). boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2minmax') # Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape # as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis. variances_tensor = np.zeros_like( boxes_tensor ) # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)` variances_tensor += self.variances # Long live broadcasting # Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)` boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1) # Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)` boxes_tensor = np.expand_dims(boxes_tensor, axis=0) boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1)) return boxes_tensor
def call(self, x, mask=None): ''' Return an anchor box tensor based on the input tensor. The logic implemented here is identical to the logic in the module `ssd_box_encode_decode_utils.py`. Note that this tensor does not participate in any graph computations at runtime. It is being created as a constant once for each classification conv layer during graph creation and is just being output along with the rest of the model output during runtime. Because of this, all logic is implemented as Numpy array operations and it is sufficient to convert the resulting Numpy array into a Keras tensor at the very end before outputting it. ''' # Compute box width and height for each aspect ratio # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`. self.aspect_ratios = np.sort(self.aspect_ratios) size = min(self.img_height, self.img_width) # Compute the box widths and and heights for all aspect ratios wh_list = [] for ar in self.aspect_ratios: if (ar == 1) & self.two_boxes_for_ar1: # Compute the regular default box for aspect ratio 1 and... w = self.this_scale * size * np.sqrt(ar) h = self.this_scale * size / np.sqrt(ar) wh_list.append((w,h)) # ...also compute one slightly larger version using the geometric mean of this scale value and the next w = np.sqrt(self.this_scale * self.next_scale) * size * np.sqrt(ar) h = np.sqrt(self.this_scale * self.next_scale) * size / np.sqrt(ar) wh_list.append((w,h)) else: w = self.this_scale * size * np.sqrt(ar) h = self.this_scale * size / np.sqrt(ar) wh_list.append((w,h)) wh_list = np.array(wh_list) # We need the shape of the input tensor if K.image_dim_ordering() == 'tf': batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future batch_size, feature_map_channels, feature_map_height, feature_map_width = x._keras_shape # Compute the grid of box center points. They are identical for all aspect ratios cell_height = self.img_height / feature_map_height cell_width = self.img_width / feature_map_width cx = np.linspace(cell_width/2, self.img_width-cell_width/2, feature_map_width) cy = np.linspace(cell_height/2, self.img_height-cell_height/2, feature_map_height) cx_grid, cy_grid = np.meshgrid(cx, cy) cx_grid = np.expand_dims(cx_grid, -1) # This is necessary for np.tile() to do what we want further down cy_grid = np.expand_dims(cy_grid, -1) # This is necessary for np.tile() to do what we want further down # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)` # where the last dimension will contain `(cx, cy, w, h)` boxes_tensor = np.zeros((feature_map_height, feature_map_width, self.n_boxes, 4)) boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h # Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)` boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2minmax') # If `limit_boxes` is enabled, clip the coordinates to lie within the image boundaries if self.limit_boxes: x_coords = boxes_tensor[:,:,:,[0, 1]] x_coords[x_coords >= self.img_width] = self.img_width - 1 x_coords[x_coords < 0] = 0 boxes_tensor[:,:,:,[0, 1]] = x_coords y_coords = boxes_tensor[:,:,:,[2, 3]] y_coords[y_coords >= self.img_height] = self.img_height - 1 y_coords[y_coords < 0] = 0 boxes_tensor[:,:,:,[2, 3]] = y_coords # `normalize_coords` is enabled, normalize the coordinates to be within [0,1] if self.normalize_coords: boxes_tensor[:, :, :, :2] /= self.img_width boxes_tensor[:, :, :, 2:] /= self.img_height if self.coords == 'centroids': # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth # Convert `(xmin, xmax, ymin, ymax)` back to `(cx, cy, w, h)` boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='minmax2centroids') # 4: Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape # as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis. variances_tensor = np.zeros_like(boxes_tensor) # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)` variances_tensor += self.variances # Long live broadcasting # Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)` boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1) # Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)` boxes_tensor = np.expand_dims(boxes_tensor, axis=0) boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1)) return boxes_tensor
def call(self, x, mask=None): ''' Return an anchor box tensor based on the shape of the input tensor. The logic implemented here is identical to the logic in the module `ssd_box_encode_decode_utils.py`. Note that this tensor does not participate in any graph computations at runtime. It is being created as a constant once during graph creation and is just being output along with the rest of the model output during runtime. Because of this, all logic is implemented as Numpy array operations and it is sufficient to convert the resulting Numpy array into a Keras tensor at the very end before outputting it. Arguments: x (tensor): 4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'` or `(batch, height, width, channels)` if `dim_ordering = 'tf'`. The input for this layer must be the output of the localization predictor layer. ''' # Compute box width and height for each aspect ratio # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`. self.aspect_ratios = np.sort(self.aspect_ratios) size = min(self.img_height, self.img_width) # Compute the box widths and and heights for all aspect ratios wh_list = [] for ar in self.aspect_ratios: if (ar == 1) & self.two_boxes_for_ar1: # Compute the regular default box for aspect ratio 1 and... w = self.this_scale * size * np.sqrt(ar) h = self.this_scale * size / np.sqrt(ar) wh_list.append((w,h)) # ...also compute one slightly larger version using the geometric mean of this scale value and the next w = np.sqrt(self.this_scale * self.next_scale) * size * np.sqrt(ar) h = np.sqrt(self.this_scale * self.next_scale) * size / np.sqrt(ar) wh_list.append((w,h)) else: w = self.this_scale * size * np.sqrt(ar) h = self.this_scale * size / np.sqrt(ar) wh_list.append((w,h)) wh_list = np.array(wh_list) # We need the shape of the input tensor if K.image_dim_ordering() == 'tf': batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future batch_size, feature_map_channels, feature_map_height, feature_map_width = x._keras_shape # Compute the grid of box center points. They are identical for all aspect ratios cell_height = self.img_height / feature_map_height cell_width = self.img_width / feature_map_width cx = np.linspace(cell_width/2, self.img_width-cell_width/2, feature_map_width) cy = np.linspace(cell_height/2, self.img_height-cell_height/2, feature_map_height) cx_grid, cy_grid = np.meshgrid(cx, cy) cx_grid = np.expand_dims(cx_grid, -1) # This is necessary for np.tile() to do what we want further down cy_grid = np.expand_dims(cy_grid, -1) # This is necessary for np.tile() to do what we want further down # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)` # where the last dimension will contain `(cx, cy, w, h)` boxes_tensor = np.zeros((feature_map_height, feature_map_width, self.n_boxes, 4)) boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h # Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)` boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2minmax') # If `limit_boxes` is enabled, clip the coordinates to lie within the image boundaries if self.limit_boxes: x_coords = boxes_tensor[:,:,:,[0, 1]] x_coords[x_coords >= self.img_width] = self.img_width - 1 x_coords[x_coords < 0] = 0 boxes_tensor[:,:,:,[0, 1]] = x_coords y_coords = boxes_tensor[:,:,:,[2, 3]] y_coords[y_coords >= self.img_height] = self.img_height - 1 y_coords[y_coords < 0] = 0 boxes_tensor[:,:,:,[2, 3]] = y_coords # `normalize_coords` is enabled, normalize the coordinates to be within [0,1] if self.normalize_coords: boxes_tensor[:, :, :, :2] /= self.img_width boxes_tensor[:, :, :, 2:] /= self.img_height if self.coords == 'centroids': # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth # Convert `(xmin, xmax, ymin, ymax)` back to `(cx, cy, w, h)` boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='minmax2centroids') # 4: Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape # as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis. variances_tensor = np.zeros_like(boxes_tensor) # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)` variances_tensor += self.variances # Long live broadcasting # Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)` boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1) # Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)` boxes_tensor = np.expand_dims(boxes_tensor, axis=0) boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1)) return boxes_tensor