def call(self, inputs): rpn_bbox_deltas = inputs[0] rpn_labels = inputs[1] anchors = inputs[2] gt_boxes = inputs[3] # total_pos_bboxes = self.hyper_params["total_pos_bboxes"] total_neg_bboxes = self.hyper_params["total_neg_bboxes"] total_bboxes = total_pos_bboxes + total_neg_bboxes anchors_shape = tf.shape(anchors) batch_size, total_anchors = anchors_shape[0], anchors_shape[1] rpn_bbox_deltas = tf.reshape(rpn_bbox_deltas, (batch_size, total_anchors, 4)) rpn_labels = tf.reshape(rpn_labels, (batch_size, total_anchors, 1)) # rpn_bboxes = helpers.get_bboxes_from_deltas(anchors, rpn_bbox_deltas) rpn_bboxes = tf.reshape(rpn_bboxes, (batch_size, total_anchors, 1, 4)) nms_bboxes, _, _, _ = helpers.non_max_suppression(rpn_bboxes, rpn_labels, max_output_size_per_class=self.hyper_params["nms_topn"], max_total_size=self.hyper_params["nms_topn"]) ################################################################################################################ pos_bbox_indices, neg_bbox_indices, gt_box_indices = helpers.get_selected_indices(nms_bboxes, gt_boxes, total_pos_bboxes, total_neg_bboxes) # pos_roi_bboxes = tf.gather(nms_bboxes, pos_bbox_indices, batch_dims=1) neg_roi_bboxes = tf.zeros((batch_size, total_neg_bboxes, 4), tf.float32) roi_bboxes = tf.concat([pos_roi_bboxes, neg_roi_bboxes], axis=1) return tf.stop_gradient(roi_bboxes), tf.stop_gradient(gt_box_indices)
def get_valid_predictions(roi_bboxes, frcnn_delta_pred, frcnn_label_pred, total_labels): """Generating valid detections from faster rcnn predictions removing backgroud predictions. Batch size should be 1 for this method. inputs: roi_bboxes = (batch_size, total_pos_bboxes + total_neg_bboxes, [y1, x1, y2, x2]) frcnn_delta_pred = (batch_size, total_pos_bboxes + total_neg_bboxes, total_labels * [delta_y, delta_x, delta_h, delta_w]) frcnn_label_pred = (batch_size, total_pos_bboxes + total_neg_bboxes, total_labels) total_labels = number, 20 + 1 for VOC dataset +1 for background label outputs: valid_pred_bboxes = (batch_size, total_valid_bboxes, total_labels, [y1, x1, y2, x2]) valid_labels = (batch_size, total_valid_bboxes, total_labels) """ pred_labels_map = tf.argmax(frcnn_label_pred, 2, output_type=tf.int32) # valid_label_indices = tf.where(tf.not_equal(pred_labels_map, total_labels-1)) total_valid_bboxes = tf.shape(valid_label_indices)[0] # valid_roi_bboxes = tf.gather_nd(roi_bboxes, valid_label_indices) valid_deltas = tf.gather_nd(frcnn_delta_pred, valid_label_indices) valid_deltas = tf.reshape(valid_deltas, (total_valid_bboxes, total_labels, 4)) valid_labels = tf.gather_nd(frcnn_label_pred, valid_label_indices) # valid_labels_map = tf.gather_nd(pred_labels_map, valid_label_indices) # flatted_bbox_indices = tf.reshape(tf.range(total_valid_bboxes), (-1, 1)) flatted_labels_indices = tf.reshape(valid_labels_map, (-1, 1)) scatter_indices = tf.concat([flatted_bbox_indices, flatted_labels_indices], 1) scatter_indices = tf.reshape(scatter_indices, (total_valid_bboxes, 2)) valid_roi_bboxes = tf.scatter_nd(scatter_indices, valid_roi_bboxes, (total_valid_bboxes, total_labels, 4)) valid_pred_bboxes = helpers.get_bboxes_from_deltas(valid_roi_bboxes, valid_deltas) return tf.expand_dims(valid_pred_bboxes, 0), tf.expand_dims(valid_labels, 0)
def call(self, inputs): rpn_bbox_deltas = inputs[0] rpn_labels = inputs[1] anchors = self.anchors # pre_nms_topn = self.hyper_params["pre_nms_topn"] post_nms_topn = self.hyper_params["post_nms_topn"] nms_iou_threshold = self.hyper_params["nms_iou_threshold"] variances = self.hyper_params["variances"] total_anchors = anchors.shape[0] batch_size = tf.shape(rpn_bbox_deltas)[0] rpn_bbox_deltas = tf.reshape(rpn_bbox_deltas, (batch_size, total_anchors, 4)) rpn_labels = tf.reshape(rpn_labels, (batch_size, total_anchors)) # rpn_bbox_deltas *= variances rpn_bboxes = helpers.get_bboxes_from_deltas(anchors, rpn_bbox_deltas) # _, pre_indices = tf.nn.top_k(rpn_labels, pre_nms_topn) # pre_roi_bboxes = tf.gather(rpn_bboxes, pre_indices, batch_dims=1) pre_roi_labels = tf.gather(rpn_labels, pre_indices, batch_dims=1) # pre_roi_bboxes = tf.reshape(pre_roi_bboxes, (batch_size, pre_nms_topn, 1, 4)) pre_roi_labels = tf.reshape(pre_roi_labels, (batch_size, pre_nms_topn, 1)) # roi_bboxes, _, _, _ = helpers.non_max_suppression( pre_roi_bboxes, pre_roi_labels, max_output_size_per_class=post_nms_topn, max_total_size=post_nms_topn, iou_threshold=nms_iou_threshold) # return tf.stop_gradient(roi_bboxes)
VOC_test_data = VOC_test_data.padded_batch(batch_size, padded_shapes=padded_shapes, padding_values=padding_values) base_model = VGG16(include_top=False) if hyper_params["stride"] == 16: base_model = Sequential(base_model.layers[:-1]) rpn_model = rpn.get_model(base_model, hyper_params) frcnn_model_path = helpers.get_model_path("frcnn", hyper_params["stride"]) rpn_model_path = helpers.get_model_path("rpn", hyper_params["stride"]) model_path = frcnn_model_path if load_weights_from_frcnn else rpn_model_path rpn_model.load_weights(model_path, by_name=True) for image_data in VOC_test_data: img, gt_boxes, gt_labels = image_data input_img, anchors = rpn.get_step_data(image_data, hyper_params, preprocess_input, mode="inference") rpn_bbox_deltas, rpn_labels = rpn_model.predict_on_batch(input_img) # anchors_shape = tf.shape(anchors) batch_size, anchor_row_size = anchors_shape[0], anchors_shape[1] rpn_bbox_deltas = tf.reshape(rpn_bbox_deltas, (batch_size, anchor_row_size, 4)) rpn_labels = tf.reshape(rpn_labels, (batch_size, anchor_row_size, 1)) # rpn_bboxes = helpers.get_bboxes_from_deltas(anchors, rpn_bbox_deltas) rpn_bboxes = tf.reshape(rpn_bboxes, (batch_size, anchor_row_size, 1, 4)) # nms_bboxes, _, _, _ = helpers.non_max_suppression(rpn_bboxes, rpn_labels, max_output_size_per_class=hyper_params["nms_topn"], max_total_size=hyper_params["nms_topn"]) img_float32 = tf.image.convert_image_dtype(img, tf.float32) helpers.draw_bboxes(img_float32, nms_bboxes)