def calculate_actual_outputs(prior_boxes, gt_boxes, gt_labels, hyper_params): """Calculate ssd actual output values. Batch operations supported. inputs: prior_boxes = (total_prior_boxes, [y1, x1, y2, x2]) these values in normalized format between [0, 1] gt_boxes (batch_size, gt_box_size, [y1, x1, y2, x2]) these values in normalized format between [0, 1] gt_labels (batch_size, gt_box_size) hyper_params = dictionary outputs: bbox_deltas = (batch_size, total_bboxes, [delta_y, delta_x, delta_h, delta_w]) bbox_labels = (batch_size, total_bboxes, [0,0,...,0]) """ batch_size = tf.shape(gt_boxes)[0] total_labels = hyper_params["total_labels"] iou_threshold = hyper_params["iou_threshold"] variances = hyper_params["variances"] total_prior_boxes = prior_boxes.shape[0] # Calculate iou values between each bboxes and ground truth boxes iou_map = bbox_utils.generate_iou_map(prior_boxes, gt_boxes) # Get max index value for each row max_indices_each_gt_box = tf.argmax(iou_map, axis=2, output_type=tf.int32) # IoU map has iou values for every gt boxes and we merge these values column wise merged_iou_map = tf.reduce_max(iou_map, axis=2) # pos_cond = tf.greater(merged_iou_map, iou_threshold) # gt_boxes_map = tf.gather(gt_boxes, max_indices_each_gt_box, batch_dims=1) expanded_gt_boxes = tf.where(tf.expand_dims(pos_cond, -1), gt_boxes_map, tf.zeros_like(gt_boxes_map)) bbox_deltas = bbox_utils.get_deltas_from_bboxes( prior_boxes, expanded_gt_boxes) / variances # gt_labels_map = tf.gather(gt_labels, max_indices_each_gt_box, batch_dims=1) expanded_gt_labels = tf.where(pos_cond, gt_labels_map, tf.zeros_like(gt_labels_map)) bbox_labels = tf.one_hot(expanded_gt_labels, total_labels) # return bbox_deltas, bbox_labels
def call(self, inputs): roi_bboxes = inputs[0] gt_boxes = inputs[1] gt_labels = inputs[2] total_labels = self.hyper_params["total_labels"] total_pos_bboxes = self.hyper_params["total_pos_bboxes"] total_neg_bboxes = self.hyper_params["total_neg_bboxes"] variances = self.hyper_params["variances"] batch_size, total_bboxes = tf.shape(roi_bboxes)[0], tf.shape(roi_bboxes)[1] # Calculate iou values between each bboxes and ground truth boxes iou_map = bbox_utils.generate_iou_map(roi_bboxes, gt_boxes) # Get max index value for each row max_indices_each_gt_box = tf.argmax(iou_map, axis=2, output_type=tf.int32) # IoU map has iou values for every gt boxes and we merge these values column wise merged_iou_map = tf.reduce_max(iou_map, axis=2) # pos_mask = tf.greater(merged_iou_map, 0.5) pos_mask = train_utils.randomly_select_xyz_mask(pos_mask, tf.constant([total_pos_bboxes], dtype=tf.int32)) # neg_mask = tf.logical_and(tf.less(merged_iou_map, 0.5), tf.greater(merged_iou_map, 0.1)) neg_mask = train_utils.randomly_select_xyz_mask(neg_mask, tf.constant([total_neg_bboxes], dtype=tf.int32)) # gt_boxes_map = tf.gather(gt_boxes, max_indices_each_gt_box, batch_dims=1) expanded_gt_boxes = tf.where(tf.expand_dims(pos_mask, axis=-1), gt_boxes_map, tf.zeros_like(gt_boxes_map)) # gt_labels_map = tf.gather(gt_labels, max_indices_each_gt_box, batch_dims=1) pos_gt_labels = tf.where(pos_mask, gt_labels_map, tf.constant(-1, dtype=tf.int32)) neg_gt_labels = tf.cast(neg_mask, dtype=tf.int32) expanded_gt_labels = pos_gt_labels + neg_gt_labels # roi_bbox_deltas = bbox_utils.get_deltas_from_bboxes(roi_bboxes, expanded_gt_boxes) / variances # roi_bbox_labels = tf.one_hot(expanded_gt_labels, total_labels) scatter_indices = tf.tile(tf.expand_dims(roi_bbox_labels, -1), (1, 1, 1, 4)) roi_bbox_deltas = scatter_indices * tf.expand_dims(roi_bbox_deltas, -2) roi_bbox_deltas = tf.reshape(roi_bbox_deltas, (batch_size, total_bboxes * total_labels, 4)) # return tf.stop_gradient(roi_bbox_deltas), tf.stop_gradient(roi_bbox_labels)
def calculate_rpn_actual_outputs(anchors, gt_boxes, gt_labels, hyper_params): """Generating one step data for training or inference. Batch operations supported. inputs: anchors = (total_anchors, [y1, x1, y2, x2]) these values in normalized format between [0, 1] gt_boxes (batch_size, gt_box_size, [y1, x1, y2, x2]) these values in normalized format between [0, 1] gt_labels (batch_size, gt_box_size) hyper_params = dictionary outputs: bbox_deltas = (batch_size, total_anchors, [delta_y, delta_x, delta_h, delta_w]) bbox_labels = (batch_size, feature_map_shape, feature_map_shape, anchor_count) """ batch_size = tf.shape(gt_boxes)[0] feature_map_shape = hyper_params["feature_map_shape"] anchor_count = hyper_params["anchor_count"] total_pos_bboxes = hyper_params["total_pos_bboxes"] total_neg_bboxes = hyper_params["total_neg_bboxes"] variances = hyper_params["variances"] # Calculate iou values between each bboxes and ground truth boxes iou_map = bbox_utils.generate_iou_map(anchors, gt_boxes) # Get max index value for each row max_indices_each_row = tf.argmax(iou_map, axis=2, output_type=tf.int32) # Get max index value for each column max_indices_each_column = tf.argmax(iou_map, axis=1, output_type=tf.int32) # IoU map has iou values for every gt boxes and we merge these values column wise merged_iou_map = tf.reduce_max(iou_map, axis=2) # pos_mask = tf.greater(merged_iou_map, 0.7) # valid_indices_cond = tf.not_equal(gt_labels, -1) valid_indices = tf.cast(tf.where(valid_indices_cond), tf.int32) valid_max_indices = max_indices_each_column[valid_indices_cond] # scatter_bbox_indices = tf.stack([valid_indices[..., 0], valid_max_indices], 1) max_pos_mask = tf.scatter_nd(scatter_bbox_indices, tf.fill((tf.shape(valid_indices)[0], ), True), tf.shape(pos_mask)) pos_mask = tf.logical_or(pos_mask, max_pos_mask) pos_mask = randomly_select_xyz_mask( pos_mask, tf.constant([total_pos_bboxes], dtype=tf.int32)) # pos_count = tf.reduce_sum(tf.cast(pos_mask, tf.int32), axis=-1) neg_count = (total_pos_bboxes + total_neg_bboxes) - pos_count # neg_mask = tf.logical_and(tf.less(merged_iou_map, 0.3), tf.logical_not(pos_mask)) neg_mask = randomly_select_xyz_mask(neg_mask, neg_count) # pos_labels = tf.where(pos_mask, tf.ones_like(pos_mask, dtype=tf.float32), tf.constant(-1.0, dtype=tf.float32)) neg_labels = tf.cast(neg_mask, dtype=tf.float32) bbox_labels = tf.add(pos_labels, neg_labels) # gt_boxes_map = tf.gather(gt_boxes, max_indices_each_row, batch_dims=1) # Replace negative bboxes with zeros expanded_gt_boxes = tf.where(tf.expand_dims(pos_mask, -1), gt_boxes_map, tf.zeros_like(gt_boxes_map)) # Calculate delta values between anchors and ground truth bboxes bbox_deltas = bbox_utils.get_deltas_from_bboxes( anchors, expanded_gt_boxes) / variances # # bbox_deltas = tf.reshape(bbox_deltas, (batch_size, feature_map_shape, feature_map_shape, anchor_count * 4)) bbox_labels = tf.reshape( bbox_labels, (batch_size, feature_map_shape, feature_map_shape, anchor_count)) # return bbox_deltas, bbox_labels