def _generate(self, feature_map_shape_list, im_height=1, im_width=1): anchor_grid_list = [] for (feat_shape, base_sizes, aspect_ratios, anchor_stride, anchor_offset ) in zip(feature_map_shape_list, self._base_sizes, self._aspect_ratios, self._anchor_strides, self._anchor_offsets): anchor_grid = grid_anchor_generator.tile_anchors( feat_shape[0], feat_shape[1], tf.cast(tf.convert_to_tensor(base_sizes), dtype=tf.float32), tf.cast(tf.convert_to_tensor(aspect_ratios), dtype=tf.float32), tf.constant([1.0, 1.0]), tf.cast(tf.convert_to_tensor(anchor_stride), dtype=tf.float32), tf.cast(tf.convert_to_tensor(anchor_offset), dtype=tf.float32)) num_anchors = anchor_grid.num_boxes_static() if num_anchors is None: num_anchors = anchor_grid.num_boxes() anchor_indices = tf.zeros([num_anchors]) anchor_grid.add_field('feature_map_index', anchor_indices) if self._normalize_coordinates: if im_height == 1 or im_width == 1: raise ValueError( 'Normalized coordinates were requested upon construction of the ' 'FlexibleGridAnchorGenerator, but a subsequent call to ' 'generate did not supply dimension information.') anchor_grid = box_list_ops.to_normalized_coordinates( anchor_grid, im_height, im_width, check_range=False) anchor_grid_list.append(anchor_grid) return anchor_grid_list
def graph_fn(): coordinates = tf.constant([[0, 0, 1, 1], [0.25, 0.25, 0.75, 0.75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) normalized_boxlist = box_list_ops.to_normalized_coordinates( boxlist, tf.shape(img)[1], tf.shape(img)[2]) return normalized_boxlist.get()
def _normalize_boxlist(args): boxes, height, width = args boxes = box_list_ops.scale(boxes, stride, stride) boxes = box_list_ops.to_normalized_coordinates(boxes, height, width) boxes = box_list_ops.clip_to_window(boxes, [0., 0., 1., 1.], filter_nonoverlapping=False) return boxes
def _generate(self, feature_map_shape_list, im_height=1, im_width=1): """Generates a collection of bounding boxes to be used as anchors. Currently we require the input image shape to be statically defined. That is, im_height and im_width should be integers rather than tensors. Args: feature_map_shape_list: list of pairs of convnet layer resolutions in the format [(height_0, width_0), (height_1, width_1), ...]. For example, setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that correspond to an 8x8 layer followed by a 7x7 layer. im_height: the height of the image to generate the grid for. If both im_height and im_width are 1, anchors can only be generated in absolute coordinates. im_width: the width of the image to generate the grid for. If both im_height and im_width are 1, anchors can only be generated in absolute coordinates. Returns: boxes_list: a list of BoxLists each holding anchor boxes corresponding to the input feature map shapes. Raises: ValueError: if im_height and im_width are 1, but normalized coordinates were requested. """ anchor_grid_list = [] for (feat_shape, base_sizes, aspect_ratios, anchor_stride, anchor_offset) in zip(feature_map_shape_list, self._base_sizes, self._aspect_ratios, self._anchor_strides, self._anchor_offsets): anchor_grid = grid_anchor_generator.tile_anchors( feat_shape[0], feat_shape[1], tf.cast(tf.convert_to_tensor(value=base_sizes), dtype=tf.float32), tf.cast(tf.convert_to_tensor(value=aspect_ratios), dtype=tf.float32), tf.constant([1.0, 1.0]), tf.cast(tf.convert_to_tensor(value=anchor_stride), dtype=tf.float32), tf.cast(tf.convert_to_tensor(value=anchor_offset), dtype=tf.float32)) num_anchors = anchor_grid.num_boxes_static() if num_anchors is None: num_anchors = anchor_grid.num_boxes() anchor_indices = tf.zeros([num_anchors]) anchor_grid.add_field('feature_map_index', anchor_indices) if self._normalize_coordinates: if im_height == 1 or im_width == 1: raise ValueError( 'Normalized coordinates were requested upon construction of the ' 'FlexibleGridAnchorGenerator, but a subsequent call to ' 'generate did not supply dimension information.') anchor_grid = box_list_ops.to_normalized_coordinates( anchor_grid, im_height, im_width, check_range=False) anchor_grid_list.append(anchor_grid) return anchor_grid_list
def graph_fn(): img = tf.ones((128, 202, 202, 3)) boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) boxlist = box_list_ops.to_absolute_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) boxlist = box_list_ops.to_normalized_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) return boxlist.get()
def _generate(self, feature_map_shape_list, im_height, im_width): """Generates a collection of bounding boxes to be used as anchors. Currently we require the input image shape to be statically defined. That is, im_height and im_width should be integers rather than tensors. Args: feature_map_shape_list: list of pairs of convnet layer resolutions in the format [(height_0, width_0), (height_1, width_1), ...]. For example, setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that correspond to an 8x8 layer followed by a 7x7 layer. im_height: the height of the image to generate the grid for. im_width: the width of the image to generate the grid for. Returns: boxes_list: a list of BoxLists each holding anchor boxes corresponding to the input feature map shapes. Raises: ValueError: if im_height and im_width are not integers. """ if not isinstance(im_height, int) or not isinstance(im_width, int): raise ValueError( 'MultiscaleGridAnchorGenerator currently requires ' 'input image shape to be statically defined.') anchor_grid_list = [] for feat_shape, grid_info in zip(feature_map_shape_list, self._anchor_grid_info): # TODO(rathodv) check the feature_map_shape_list is consistent with # self._anchor_grid_info level = grid_info['level'] stride = 2**level scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info[ 'info'] feat_h = feat_shape[0] feat_w = feat_shape[1] anchor_offset = [0, 0] if im_height % 2.0**level == 0: anchor_offset[0] = stride / 2.0 if im_width % 2.0**level == 0: anchor_offset[1] = stride / 2.0 ag = grid_anchor_generator.GridAnchorGenerator( scales, aspect_ratios, base_anchor_size=base_anchor_size, anchor_stride=anchor_stride, anchor_offset=anchor_offset) (anchor_grid, ) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)]) if self._normalize_coordinates: anchor_grid = box_list_ops.to_normalized_coordinates( anchor_grid, im_height, im_width, check_range=False) anchor_grid_list.append(anchor_grid) return anchor_grid_list
def test_to_normalized_coordinates_already_normalized(self): coordinates = tf.constant([[0, 0, 1, 1], [0.25, 0.25, 0.75, 0.75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) normalized_boxlist = box_list_ops.to_normalized_coordinates( boxlist, tf.shape(img)[1], tf.shape(img)[2]) with self.test_session() as sess: with self.assertRaisesOpError('assertion failed'): sess.run(normalized_boxlist.get())
def test_to_normalized_coordinates_already_normalized(self): coordinates = tf.constant([[0, 0, 1, 1], [0.25, 0.25, 0.75, 0.75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) normalized_boxlist = box_list_ops.to_normalized_coordinates( boxlist, tf.shape(img)[1], tf.shape(img)[2]) with self.test_session() as sess: with self.assertRaisesOpError('assertion failed'): sess.run(normalized_boxlist.get())
def _postprocess_sample(self, boxes_output_stride, instance_embedding, pixel_embedding): """Post process masks for a single sample. Args: boxes_output_stride: A [num_instances, 4] float tensor containing bounding boxes in the absolute output space. instance_embedding: A [output_height, output_width, embedding_size] float tensor containing instance embeddings. pixel_embedding: A [batch_size, output_height, output_width, pixel_embedding_size] float tensor containing the per-pixel embedding. Returns: masks: A float tensor of size [num_instances, mask_height, mask_width] containing binary per-box instance masks. If predict_full_resolution_masks is set, the masks will be resized to postprocess_crop_size. Otherwise, mask_height=mask_width=mask_size """ height, width = (tf.shape(instance_embedding)[0], tf.shape(instance_embedding)[1]) height, width = tf.cast(height, tf.float32), tf.cast(width, tf.float32) blist = box_list.BoxList(boxes_output_stride) blist = box_list_ops.to_normalized_coordinates(blist, height, width, check_range=False) boxes = blist.get() mask_input = self._get_mask_head_input(boxes, pixel_embedding) instance_embeddings = self._get_instance_embeddings( boxes, instance_embedding) mask_logits = self._mask_net( instance_embeddings, mask_input, training=tf.keras.backend.learning_phase()) # TODO(vighneshb) Explore sweeping mask thresholds. if self._deepmac_params.predict_full_resolution_masks: height, width = tf.shape(mask_logits)[1], tf.shape(mask_logits)[2] height *= self._stride width *= self._stride mask_logits = resize_instance_masks(mask_logits, (height, width)) mask_logits = crop_masks_within_boxes( mask_logits, boxes, self._deepmac_params.postprocess_crop_size) masks_prob = tf.nn.sigmoid(mask_logits) return masks_prob
def _generate(self, feature_map_shape_list, im_height, im_width): """Generates a collection of bounding boxes to be used as anchors. Currently we require the input image shape to be statically defined. That is, im_height and im_width should be integers rather than tensors. Args: feature_map_shape_list: list of pairs of convnet layer resolutions in the format [(height_0, width_0), (height_1, width_1), ...]. For example, setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that correspond to an 8x8 layer followed by a 7x7 layer. im_height: the height of the image to generate the grid for. im_width: the width of the image to generate the grid for. Returns: boxes_list: a list of BoxLists each holding anchor boxes corresponding to the input feature map shapes. Raises: ValueError: if im_height and im_width are not integers. """ if not isinstance(im_height, int) or not isinstance(im_width, int): raise ValueError('MultiscaleGridAnchorGenerator currently requires ' 'input image shape to be statically defined.') anchor_grid_list = [] for feat_shape, grid_info in zip(feature_map_shape_list, self._anchor_grid_info): # TODO(rathodv) check the feature_map_shape_list is consistent with # self._anchor_grid_info level = grid_info['level'] stride = 2**level scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info['info'] feat_h = feat_shape[0] feat_w = feat_shape[1] anchor_offset = [0, 0] if im_height % 2.0**level == 0: anchor_offset[0] = stride / 2.0 if im_width % 2.0**level == 0: anchor_offset[1] = stride / 2.0 ag = grid_anchor_generator.GridAnchorGenerator( scales, aspect_ratios, base_anchor_size=base_anchor_size, anchor_stride=anchor_stride, anchor_offset=anchor_offset) (anchor_grid,) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)]) if self._normalize_coordinates: anchor_grid = box_list_ops.to_normalized_coordinates( anchor_grid, im_height, im_width, check_range=False) anchor_grid_list.append(anchor_grid) return anchor_grid_list
def test_to_normalized_coordinates(self): coordinates = tf.constant([[0, 0, 100, 100], [25, 25, 75, 75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) normalized_boxlist = box_list_ops.to_normalized_coordinates( boxlist, tf.shape(img)[1], tf.shape(img)[2]) expected_boxes = [[0, 0, 1, 1], [0.25, 0.25, 0.75, 0.75]] with self.test_session() as sess: normalized_boxes = sess.run(normalized_boxlist.get()) self.assertAllClose(normalized_boxes, expected_boxes)
def test_to_normalized_coordinates(self): coordinates = tf.constant([[0, 0, 100, 100], [25, 25, 75, 75]], tf.float32) img = tf.ones((128, 100, 100, 3)) boxlist = box_list.BoxList(coordinates) normalized_boxlist = box_list_ops.to_normalized_coordinates( boxlist, tf.shape(img)[1], tf.shape(img)[2]) expected_boxes = [[0, 0, 1, 1], [0.25, 0.25, 0.75, 0.75]] with self.test_session() as sess: normalized_boxes = sess.run(normalized_boxlist.get()) self.assertAllClose(normalized_boxes, expected_boxes)
def test_convert_to_absolute_and_back(self): coordinates = np.random.uniform(size=(100, 4)) coordinates = np.sort(coordinates) coordinates[99, :] = [0, 0, 1, 1] img = tf.ones((128, 202, 202, 3)) boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) boxlist = box_list_ops.to_absolute_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) boxlist = box_list_ops.to_normalized_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) with self.test_session() as sess: out = sess.run(boxlist.get()) self.assertAllClose(out, coordinates)
def test_convert_to_absolute_and_back(self): coordinates = np.random.uniform(size=(100, 4)) coordinates = np.sort(coordinates) coordinates[99, :] = [0, 0, 1, 1] img = tf.ones((128, 202, 202, 3)) boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32)) boxlist = box_list_ops.to_absolute_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) boxlist = box_list_ops.to_normalized_coordinates(boxlist, tf.shape(img)[1], tf.shape(img)[2]) with self.test_session() as sess: out = sess.run(boxlist.get()) self.assertAllClose(out, coordinates)
def get_feature_map_anchor_boxes(feature_map_shape_list, **anchor_kwargs): """ :param feature_map_shape_list: list of tuples containing feature map resolutions :returns: dict with feature map shape tuple as key and list of [ymin, xmin, ymax, xmax] box co-ordinates """ anchor_generator = RegionsGridAnchorGenerator(**anchor_kwargs) anchor_box_lists = anchor_generator.generate(feature_map_shape_list) anchor_box_lists = [ box_list_ops.to_normalized_coordinates(anchor_box_lists[0], 1280, 1920, check_range=False) ] feature_map_boxes = {} with tf.Session() as sess: for shape, box_list in zip(feature_map_shape_list, anchor_box_lists): feature_map_boxes[shape] = sess.run(box_list.data['boxes']) return feature_map_boxes
def batch_assign_mask_targets(image_shape, groundtruth_masks_list, proposal_boxlists, match_list, mask_height, mask_width): """Batched assignment of mask targets. Args: image_shape: a 1-D tensor of shape [4] representing the input image shape. groundtruth_masks_list: a list of 2-D tf.bool tensors of shape [num_boxes, height_in, width_in] containing instance masks with values in {0, 1}. proposal_boxlists: a list of BoxLists, each containing a tensor of shape [self.max_num_proposals, 4] which represents decoded proposal bounding boxes. match_list: a list of matcher.Match objects encoding the match between anchors and groundtruth boxes for each image of the batch, with rows of the Match objects corresponding to groundtruth boxes and columns corresponding to anchors. mask_height: a 0-D integer tensor representing the predicted mask height. mask_width: a 0-D integer tensor representing the predicted mask width. Returns: batch_mask_targets: a tensor of shape [batch_size, num_anchors, mask_height * mask_width]. batch_mask_weights: a tensor of shape [batch_size, num_anchors]. """ mask_targets_list = [] mask_weights_list = [] for (groundtruth_masks, match, proposal_boxlist) in zip(groundtruth_masks_list, match_list, proposal_boxlists): groundtruth_masks = tf.cast(tf.expand_dims(groundtruth_masks, axis=3), tf.float32) # We crop some arbitrary mask for unmatched examples, but # disable their influence in the loss with batch_reg_weights. gt_inds_per_anchor = tf.maximum(match.match_results, 0) proposal_boxes_normalized = box_list_ops.to_normalized_coordinates( proposal_boxlist, image_shape[1], image_shape[2], check_range=False).get() #mask_crops = tf.image.crop_and_resize( # image=groundtruth_masks, # boxes=proposal_boxes_normalized, # box_ind=gt_inds_per_anchor, # crop_size=[mask_height, mask_width]) #mask_crops = tf.to_float(tf.greater(mask_crops, 0.5)) def py_crop(np_masks, np_proposals, np_inds, height, width): mask_crops = np.zeros((len(np_inds), height, width), dtype=np.float32) for i in range(len(np_inds)): roi = np_proposals[i, :] crop = np_masks[np_inds[i], int(roi[0]):int(roi[2]) + 1, int(roi[1]):int(roi[3]) + 1] crop = cv2.resize(crop, (width, height), interpolation=cv2.INTER_NEAREST) mask_crops[i, :, :] = crop return mask_crops mask_crops = tf.py_func(py_crop, [ groundtruth_masks, proposal_boxlist.get(), gt_inds_per_anchor, mask_height, mask_width ], [tf.float32]) mask_crops = tf.convert_to_tensor(tf.cast(mask_crops, tf.float32), name='mask_crops') mask_targets = tf.reshape(mask_crops, [-1, mask_height * mask_width]) mask_weights = tf.cast(match.matched_column_indicator(), tf.float32) mask_targets_list.append(mask_targets) mask_weights_list.append(mask_weights) batch_mask_targets = tf.stack(mask_targets_list) batch_mask_weights = tf.stack(mask_weights_list) return batch_mask_targets, batch_mask_weights
def _generate(self, feature_map_shape_list, im_height=1, im_width=1): """Generates a collection of bounding boxes to be used as anchors. For training, we require the input image shape to be statically defined. That is, im_height and im_width should be integers rather than tensors. For inference, im_height and im_width can be either integers (for fixed image size), or tensors (for arbitrary image size). Args: feature_map_shape_list: list of pairs of convnet layer resolutions in the format [(height_0, width_0), (height_1, width_1), ...]. For example, setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that correspond to an 8x8 layer followed by a 7x7 layer. im_height: the height of the image to generate the grid for. If both im_height and im_width are 1, anchors can only be generated in absolute coordinates. im_width: the width of the image to generate the grid for. If both im_height and im_width are 1, anchors can only be generated in absolute coordinates. Returns: boxes_list: a list of BoxLists each holding anchor boxes corresponding to the input feature map shapes. Raises: ValueError: if im_height and im_width are not integers. ValueError: if im_height and im_width are 1, but normalized coordinates were requested. """ anchor_grid_list = [] for feat_shape, grid_info in zip(feature_map_shape_list, self._anchor_grid_info): # TODO(rathodv) check the feature_map_shape_list is consistent with # self._anchor_grid_info level = grid_info['level'] stride = 2**level scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info['info'] feat_h = feat_shape[0] feat_w = feat_shape[1] anchor_offset = [0, 0] if isinstance(im_height, int) and isinstance(im_width, int): if im_height % 2.0**level == 0 or im_height == 1: anchor_offset[0] = stride / 2.0 if im_width % 2.0**level == 0 or im_width == 1: anchor_offset[1] = stride / 2.0 if tf.is_tensor(im_height) and tf.is_tensor(im_width): anchor_offset[0] = stride / 2.0 anchor_offset[1] = stride / 2.0 ag = grid_anchor_generator.GridAnchorGenerator( scales, aspect_ratios, base_anchor_size=base_anchor_size, anchor_stride=anchor_stride, anchor_offset=anchor_offset) (anchor_grid,) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)]) if self._normalize_coordinates: if im_height == 1 or im_width == 1: raise ValueError( 'Normalized coordinates were requested upon construction of the ' 'MultiscaleGridAnchorGenerator, but a subsequent call to ' 'generate did not supply dimension information.') anchor_grid = box_list_ops.to_normalized_coordinates( anchor_grid, im_height, im_width, check_range=False) anchor_grid_list.append(anchor_grid) return anchor_grid_list