def _build(self, image, gt_boxes=None, is_training=False): if gt_boxes is not None: gt_boxes = tf.cast(gt_boxes, tf.float32) image.set_shape((None, None, 3)) conv_feature_map = self.base_network( tf.expand_dims(image, 0), is_training=is_training ) # The RPN submodule which generates proposals of objects. self._rpn = RPN( self._num_anchors, self._config.model.rpn, debug=self._debug, seed=self._seed ) if self._with_rcnn: self._rcnn = RCNN( self._num_classes, self._config.model.rcnn, debug=self._debug, seed=self._seed ) image_shape = tf.shape(image)[0:2] variable_summaries( conv_feature_map, 'conv_feature_map', 'reduced' ) all_anchors = self._generate_anchors(tf.shape(conv_feature_map)) rpn_prediction = self._rpn( conv_feature_map, image_shape, all_anchors, gt_boxes=gt_boxes, is_training=is_training ) prediction_dict = { 'rpn_prediction': rpn_prediction, } if self._debug: prediction_dict['image'] = image prediction_dict['image_shape'] = image_shape prediction_dict['all_anchors'] = all_anchors prediction_dict['anchor_reference'] = tf.convert_to_tensor( self._anchor_reference ) if gt_boxes is not None: prediction_dict['gt_boxes'] = gt_boxes prediction_dict['conv_feature_map'] = conv_feature_map if self._with_rcnn: proposals = tf.stop_gradient(rpn_prediction['proposals']) classification_pred = self._rcnn( conv_feature_map, proposals, image_shape, self.base_network, gt_boxes=gt_boxes, is_training=is_training ) prediction_dict['classification_prediction'] = classification_pred return prediction_dict
def testFocalL1Loss(self): """Tests that focal & smooth l1 for classification, regression loss respectively returns reasonable values in simple cases. """ config = self.config config["loss"] = {"type": "focal"} model = RPN(self.num_anchors, config, debug=True) # Define placeholders that are used inside the loss method. rpn_cls_prob = tf.placeholder(tf.float32) rpn_cls_target = tf.placeholder(tf.float32) rpn_cls_score = tf.placeholder(tf.float32) rpn_bbox_target = tf.placeholder(tf.float32) rpn_bbox_pred = tf.placeholder(tf.float32) loss = model.loss({ "rpn_cls_prob": rpn_cls_prob, "rpn_cls_target": rpn_cls_target, "rpn_cls_score": rpn_cls_score, "rpn_bbox_target": rpn_bbox_target, "rpn_bbox_pred": rpn_bbox_pred, }) # Test perfect score. with self.test_session() as sess: sess.run(tf.global_variables_initializer()) loss_dict = sess.run( loss, feed_dict={ # Probability is (background_prob, foreground_prob) rpn_cls_prob: [[0, 1], [1.0, 0]], # Target: 1 being foreground, 0 being background. rpn_cls_target: [1, 0], # Class scores before applying softmax. Since using cross # entropy, we need a big difference between values. rpn_cls_score: [[-100.0, 100.0], [100.0, -100.0]], # Targets and predictions are exactly equal. rpn_bbox_target: [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1]], rpn_bbox_pred: [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1]], }, ) # Assert close since cross-entropy could return very small value. self.assertAllClose(tuple(loss_dict.values()), (0, 0))
def _build(self, image, gt_boxes=None, is_training=False): """ Returns bounding boxes and classification probabilities. Args: image: A tensor with the image. Its shape should be `(height, width, 3)`. gt_boxes: A tensor with all the ground truth boxes of that image. Its shape should be `(num_gt_boxes, 5)` Where for each gt box we have (x1, y1, x2, y2, label), in that order. is_training: A boolean to whether or not it is used for training. Returns: classification_prob: A tensor with the softmax probability for each of the bounding boxes found in the image. Its shape should be: (num_bboxes, num_categories + 1) classification_bbox: A tensor with the bounding boxes found. It's shape should be: (num_bboxes, 4). For each of the bboxes we have (x1, y1, x2, y2) """ if gt_boxes is not None: gt_boxes = tf.cast(gt_boxes, tf.float32) # A Tensor with the feature map for the image, # its shape should be `(feature_height, feature_width, 512)`. # The shape depends of the pretrained network in use. # Set rank and last dimension before using base network # TODO: Why does it loose information when using queue? image.set_shape((None, None, 3)) conv_feature_map = self.base_network(tf.expand_dims(image, 0), is_training=is_training) # The RPN submodule which generates proposals of objects. self._rpn = RPN( self._num_anchors, self._config.model.rpn, debug=self._debug, seed=self._seed, ) if self._with_rcnn: # The RCNN submodule which classifies RPN's proposals and # classifies them as background or a specific class. self._rcnn = RCNN( self._num_classes, self._config.model.rcnn, debug=self._debug, seed=self._seed, ) image_shape = tf.shape(image)[0:2] variable_summaries(conv_feature_map, "conv_feature_map", "reduced") # Generate anchors for the image based on the anchor reference. all_anchors = self._generate_anchors(tf.shape(conv_feature_map)) rpn_prediction = self._rpn( conv_feature_map, image_shape, all_anchors, gt_boxes=gt_boxes, is_training=is_training, ) prediction_dict = { "rpn_prediction": rpn_prediction, } if self._debug: prediction_dict["image"] = image prediction_dict["image_shape"] = image_shape prediction_dict["all_anchors"] = all_anchors prediction_dict["anchor_reference"] = tf.convert_to_tensor( self._anchor_reference) if gt_boxes is not None: prediction_dict["gt_boxes"] = gt_boxes prediction_dict["conv_feature_map"] = conv_feature_map if self._with_rcnn: proposals = tf.stop_gradient(rpn_prediction["proposals"]) classification_pred = self._rcnn( conv_feature_map, proposals, image_shape, self.base_network, gt_boxes=gt_boxes, is_training=is_training, ) prediction_dict["classification_prediction"] = classification_pred return prediction_dict
def testBasic(self): """Tests shapes are consistent with anchor generation. """ model = RPN(self.num_anchors, self.config, debug=True) # (plus the batch number) pretrained_output_shape = (1, 32, 32, 512) pretrained_output = tf.placeholder(tf.float32, shape=pretrained_output_shape) # Estimate image shape from the pretrained output and the anchor stride image_shape_val = ( int(pretrained_output_shape[1] * self.stride), int(pretrained_output_shape[2] * self.stride), ) # Use 4 ground truth boxes. gt_boxes_shape = (4, 4) gt_boxes = tf.placeholder(tf.float32, shape=gt_boxes_shape) image_shape_shape = (2, ) image_shape = tf.placeholder(tf.float32, shape=image_shape_shape) # Total anchors depends on the pretrained output shape and the total # number of anchors per point. total_anchors = (pretrained_output_shape[1] * pretrained_output_shape[2] * self.num_anchors) all_anchors_shape = (total_anchors, 4) all_anchors = tf.placeholder(tf.float32, shape=all_anchors_shape) layers = model(pretrained_output, image_shape, all_anchors, gt_boxes=gt_boxes) with self.test_session() as sess: # As in the case of a real session we need to initialize the # variables. sess.run(tf.global_variables_initializer()) layers_inst = sess.run( layers, feed_dict={ # We don't really care about the value of the pretrained output # only that has the correct shape. pretrained_output: np.random.rand(*pretrained_output_shape), # Generate random but valid ground truth boxes. gt_boxes: generate_gt_boxes(gt_boxes_shape[0], image_shape_val), # Generate anchors from a reference and the shape of the # pretrained_output. all_anchors: generate_anchors( generate_anchors_reference(self.base_size, self.ratios, self.scales), 16, pretrained_output_shape[1:3]), image_shape: image_shape_val, }) # Class score generates 2 values per anchor. rpn_cls_score_shape = layers_inst['rpn_cls_score'].shape rpn_cls_score_true_shape = (total_anchors, 2) self.assertEqual(rpn_cls_score_shape, rpn_cls_score_true_shape) # Probs have the same shape as cls scores. rpn_cls_prob_shape = layers_inst['rpn_cls_prob'].shape self.assertEqual(rpn_cls_prob_shape, rpn_cls_score_true_shape) # We check softmax with the sum of the output. rpn_cls_prob_sum = layers_inst['rpn_cls_prob'].sum(axis=1) self.assertAllClose(rpn_cls_prob_sum, np.ones(total_anchors)) # Proposals and scores are related to the output of the NMS with # limits. total_proposals = layers_inst['proposals'].shape[0] total_scores = layers_inst['scores'].shape[0] # Check we don't get more than top_n proposals. self.assertGreaterEqual(self.config.proposals.post_nms_top_n, total_proposals) # Check we get a score for each proposal. self.assertEqual(total_proposals, total_scores) # Check that we get a regression for each anchor. self.assertEqual(layers_inst['rpn_bbox_pred'].shape, (total_anchors, 4)) # Check that we get a target for each regression for each anchor. self.assertEqual(layers_inst['rpn_bbox_target'].shape, (total_anchors, 4)) # Check that we get a target class for each anchor. self.assertEqual(layers_inst['rpn_cls_target'].shape, (total_anchors, )) # Check that targets are composed of [-1, 0, 1] only. rpn_cls_target = layers_inst['rpn_cls_target'] self.assertEqual(tuple(np.sort(np.unique(rpn_cls_target))), (-1, 0., 1.)) batch_cls_target = rpn_cls_target[(rpn_cls_target == 0.) | (rpn_cls_target == 1.)] # Check that the non negative target class are exactly the size # as the minibatch self.assertEqual(batch_cls_target.shape, (self.config.target.minibatch_size, )) # Check that we get upto foreground_fraction of positive anchors. self.assertLessEqual( batch_cls_target[batch_cls_target == 1.].shape[0] / batch_cls_target.shape[0], self.config.target.foreground_fraction)
def testTypes(self): """Tests that return types are the expected ones. """ # We repeat testBasic's setup. model = RPN(self.num_anchors, self.config, debug=True) pretrained_output_shape = (1, 32, 32, 512) pretrained_output = tf.placeholder(tf.float32, shape=pretrained_output_shape) image_shape_val = ( int(pretrained_output_shape[1] * self.stride), int(pretrained_output_shape[2] * self.stride), ) gt_boxes_shape = (4, 4) gt_boxes = tf.placeholder(tf.float32, shape=gt_boxes_shape) image_shape_shape = (2, ) image_shape = tf.placeholder(tf.float32, shape=image_shape_shape) total_anchors = (pretrained_output_shape[1] * pretrained_output_shape[2] * self.num_anchors) all_anchors_shape = (total_anchors, 4) all_anchors = tf.placeholder(tf.float32, shape=all_anchors_shape) layers = model(pretrained_output, image_shape, all_anchors, gt_boxes=gt_boxes) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) layers_inst = sess.run( layers, feed_dict={ pretrained_output: np.random.rand(*pretrained_output_shape), gt_boxes: generate_gt_boxes(gt_boxes_shape[0], image_shape_val), all_anchors: generate_anchors( generate_anchors_reference(self.base_size, self.ratios, self.scales), 16, pretrained_output_shape[1:3]), image_shape: image_shape_val, }) # Assertions proposals = layers_inst['proposals'] scores = layers_inst['scores'] rpn_cls_prob = layers_inst['rpn_cls_prob'] rpn_cls_score = layers_inst['rpn_cls_score'] rpn_bbox_pred = layers_inst['rpn_bbox_pred'] rpn_cls_target = layers_inst['rpn_cls_target'] rpn_bbox_target = layers_inst['rpn_bbox_target'] # Everything should have dtype=tf.float32 self.assertAllEqual( # We have 7 values we want to compare to tf.float32. [tf.float32] * 7, [ proposals.dtype, scores.dtype, rpn_cls_prob.dtype, rpn_cls_score.dtype, rpn_bbox_pred.dtype, rpn_cls_target.dtype, rpn_bbox_target.dtype, ])
def _build(self, image, gt_boxes=None, is_training=False): """ Returns bounding boxes and classification probabilities. Args: image: A tensor with the image. Its shape should be `(height, width, 3)`. gt_boxes: A tensor with all the ground truth boxes of that image. Its shape should be `(num_gt_boxes, 5)` Where for each gt box we have (x1, y1, x2, y2, label), in that order. is_training: A boolean to whether or not it is used for training. Returns: classification_prob: A tensor with the softmax probability for each of the bounding boxes found in the image. Its shape should be: (num_bboxes, num_categories + 1) classification_bbox: A tensor with the bounding boxes found. It's shape should be: (num_bboxes, 4). For each of the bboxes we have (x1, y1, x2, y2) """ #### use variable_scope to split BodyDetector and PartDetector if gt_boxes is not None: gt_boxes = tf.cast(gt_boxes, tf.float32) # A Tensor with the feature map for the image, # its shape should be `(feature_height, feature_width, 512)`. # The shape depends of the pretrained network in use. # Set rank and last dimension before using base network # TODO: Why does it loose information when using queue? image.set_shape((None, None, 3)) conv_feature_map = self.base_network( tf.expand_dims(image, 0), is_training=is_training ) C4 = conv_feature_map with tf.variable_scope("C5"): C5 = self.iter_unify_layer(C4, is_training=is_training) #C5 = self.unify_layer(C4, is_training=is_training) with tf.variable_scope("Head_body_part"): Head_body_part = self.iter_unify_layer(C5, is_training=is_training) #Head_body_part = self.unify_layer(C5, is_training=is_training) with tf.variable_scope("Head_hf_part"): Head_hf_part = self.iter_unify_layer(C5, is_training=is_training) #Head_hf_part = self.unify_layer(C5, is_training=is_training) with tf.variable_scope("Head_hf_part_conv"): Head_hf_part_conv = self.iter_unify_layer( Head_hf_part, is_training=is_training ) # The RPN submodule which generates proposals of objects. self._rpn = RPN( self._num_anchors, self._config.model.rpn, debug=self._debug, seed=self._seed ) if self._with_rcnn: # The RCNN submodule which classifies RPN's proposals and # classifies them as background or a specific class. self._rcnn = RCNN( self._num_classes, self._config.model.rcnn, debug=self._debug, seed=self._seed, name="__rcnn__1" ) image_shape = tf.shape(image)[0:2] variable_summaries( conv_feature_map, 'conv_feature_map', 'reduced' ) # Generate anchors for the image based on the anchor reference. all_anchors_1 = self._generate_anchors(tf.shape(conv_feature_map)) rpn_1_prediction = self._rpn( conv_feature_map, image_shape, all_anchors_1, gt_boxes=gt_boxes, is_training=is_training ) prediction_1_dict = { 'rpn_prediction': rpn_1_prediction, } if self._debug: prediction_1_dict['image'] = image prediction_1_dict['image_shape'] = image_shape prediction_1_dict['all_anchors'] = all_anchors_1 prediction_1_dict['anchor_reference'] = tf.convert_to_tensor( self._anchor_reference ) if gt_boxes is not None: prediction_1_dict['gt_boxes'] = gt_boxes prediction_1_dict['conv_feature_map'] = conv_feature_map if self._with_rcnn: proposals = tf.stop_gradient(rpn_1_prediction['proposals']) rpn_1_proposals = proposals classification_pred = self._rcnn( Head_body_part, proposals, image_shape, self.base_network, gt_boxes=gt_boxes, is_training=is_training ) #### retrieve req from classification_pred without_filter_dict = classification_pred["without_filter_dict"] objects_1_all = without_filter_dict["objects"] labels_1_all = without_filter_dict["proposal_label"] probs_1_all = without_filter_dict["proposal_label_prob"] objects_1 = classification_pred["objects"] labels_1 = classification_pred["labels"] probs_1 = classification_pred["probs"] prediction_1_dict['objects'] = objects_1 prediction_1_dict['labels'] = labels_1 prediction_1_dict['probs'] = probs_1 top_indices = tf.nn.top_k(tf.cast(1 - tf.sign(tf.abs(labels_1_all - self._main_part_label)), dtype=tf.float32) + probs_1_all, k = tf.shape(labels_1_all)[0]).indices objects_1_sorted = tf.gather(objects_1_all ,top_indices) filter_num = tf.minimum(tf.shape(objects_1_sorted)[0], 7) objects_1_filtered = tf.slice(objects_1_sorted, begin=[0, 0], size=[filter_num, 4]) #### expand with label [?, 4] -> [?, 5] objects_1_filtered = tf.concat([objects_1_filtered, tf.fill([tf.shape(objects_1_filtered)[0], 1], value=tf.convert_to_tensor(self._main_part_label, dtype=tf.float32))], axis=-1) prediction_1_dict['classification_prediction'] = classification_pred if gt_boxes is not None: body_feature_ground_truth = self.generate_PartDetector_features( input_image=image, input_feature=Head_hf_part, gt_boxes = gt_boxes, only_main_part_boxes=False ) body_feature_pred = self.generate_PartDetector_features( input_image=image, input_feature=Head_hf_part, gt_boxes=tf.concat([tf.gather(gt_boxes, tf.reshape(tf.where(tf.not_equal(gt_boxes[:, -1], self._main_part_label)), [-1])), objects_1_filtered], axis=0) ,only_main_part_boxes=False) else: body_feature_ground_truth = None body_feature_pred = self.generate_PartDetector_features( input_image=image, input_feature=Head_hf_part, gt_boxes=objects_1_filtered, only_main_part_boxes=True ) #### use as fake placeholder if gt_boxes is not None: body_feature_pred = tf.reshape(body_feature_pred, [-1, tf.shape(body_feature_ground_truth)[-1]]) else: body_feature_pred = tf.reshape(body_feature_pred, [-1, 147461]) #### unstack it in firxt dim and "map reduce" it on modified faster-rcnn #### but the input ground truth label should perform label remapping is the "decoder" of single feature fixed_sliced_size ,PartDetector_feature_stacked = self.padding_and_slice_PartDetector_features(body_pred_feature=body_feature_pred, body_ground_truth_feature=body_feature_ground_truth) PartDetector_feature_stacked = tf.slice(PartDetector_feature_stacked, begin=[0, 0], size=[fixed_sliced_size, -1]) if gt_boxes is not None: PartDetector_feature_stacked = tf.gather(PartDetector_feature_stacked, tf.random_shuffle(tf.range(fixed_sliced_size))) PartDetector_feature_stacked = tf.reshape(PartDetector_feature_stacked, [fixed_sliced_size, -1]) PartDetector_feature_unstacked = [PartDetector_feature_stacked[0,...]] else: PartDetector_feature_unstacked = tf.unstack(PartDetector_feature_stacked, axis=0) partdetector_dict_list = [] for single_partdetector_feature in PartDetector_feature_unstacked: if gt_boxes is not None: main_part_ori_bbox ,cropped_feature, cropped_bboxes = self.decode_single_unstacked_feature(input_feature=single_partdetector_feature, only_main_part_boxes = True if gt_boxes is None else False) else: main_part_ori_bbox, cropped_feature = self.decode_single_unstacked_feature(input_feature=single_partdetector_feature, only_main_part_boxes = True if gt_boxes is None else False) cropped_bboxes = None x1, y1, x2, y2 ,_ = tf.split(main_part_ori_bbox, 5) x1, y1, x2, y2 = map(lambda x: tf.cast(tf.reshape(x, []), tf.int32), [x1, y1, x2, y2]) cropped_image = tf.image.crop_to_bounding_box(image=image, offset_height=y1, offset_width=x1, target_height=y2 - y1 + 1, target_width=x2 - x1 + 1) cropped_feature = tf.expand_dims(cropped_feature, 0) input_feature = Head_hf_part_conv image_h, image_w = tf.split(tf.shape(image)[0:2], num_or_size_splits=2) feature_h, feature_w = tf.split(tf.shape(input_feature)[1:3], num_or_size_splits=2) t4 = [x1, y1, x2, y2] Head_hf_part_conv = tf.slice(input_feature, begin=[0, tf.reshape(tf.cast(tf.cast(t4[1], tf.float32) / tf.cast(image_h, tf.float32) * tf.cast(feature_h, tf.float32), tf.int32), []), tf.reshape(tf.cast(tf.cast(t4[0], tf.float32) / tf.cast(image_w, tf.float32) * tf.cast(feature_w, tf.float32), tf.int32), []), 0], size=[-1, tf.reshape(tf.cast(tf.cast(t4[3] - t4[1], tf.float32)/ tf.cast(image_h, tf.float32) * tf.cast(feature_h, tf.float32), tf.int32), []) , tf.reshape(tf.cast(tf.cast(t4[2] - t4[0], tf.float32) / tf.cast(image_w, tf.float32) * tf.cast(feature_w, tf.float32), tf.int32), []) , 256] ) #### Head_hf_part_conv not crop, test the efficiency partdetector_dict = self.partdetetor(conv_feature_map = cropped_feature, Head_hf_part_conv = Head_hf_part_conv ,image = cropped_image, gt_boxes = cropped_bboxes, is_training = is_training) partdetector_dict["main_info"] = { "image": image, "main_part_ori_bbox": main_part_ori_bbox } partdetector_dict_list.append(partdetector_dict) return [prediction_1_dict] + partdetector_dict_list
def _build(self, image, gt_boxes=None, is_training=True): """ Returns bounding boxes and classification probabilities. Args: image: A tensor with the image. Its shape should be `(1, height, width, 3)`. gt_boxes: A tensor with all the ground truth boxes of that image. Its shape should be `(num_gt_boxes, 5)` Where for each gt box we have (x1, y1, x2, y2, label), in that order. is_training: A boolean to whether or not it is used for training. Returns: classification_prob: A tensor with the softmax probability for each of the bounding boxes found in the image. Its shape should be: (num_bboxes, num_categories + 1) classification_bbox: A tensor with the bounding boxes found. It's shape should be: (num_bboxes, 4). For each of the bboxes we have (x1, y1, x2, y2) """ if gt_boxes is not None: gt_boxes = tf.cast(gt_boxes, tf.float32) # A Tensor with the feature map for the image, # its shape should be `(feature_height, feature_width, 512)`. # The shape depends of the pretrained network in use. conv_feature_map = self.base_network(image, is_training=is_training) # The RPN submodule which generates proposals of objects. self._rpn = RPN(self._num_anchors, self._config.rpn, debug=self._debug, seed=self._seed) if self._with_rcnn: # The RCNN submodule which classifies RPN's proposals and # classifies them as background or a specific class. self._rcnn = RCNN(self._num_classes, self._config.rcnn, debug=self._debug, seed=self._seed) image_shape = tf.shape(image)[1:3] variable_summaries(conv_feature_map, 'conv_feature_map', ['rpn']) # Generate anchors for the image based on the anchor reference. all_anchors = self._generate_anchors(tf.shape(conv_feature_map)) rpn_prediction = self._rpn(conv_feature_map, image_shape, all_anchors, gt_boxes=gt_boxes) prediction_dict = { 'rpn_prediction': rpn_prediction, } if self._debug: prediction_dict['image'] = image prediction_dict['image_shape'] = image_shape prediction_dict['all_anchors'] = all_anchors prediction_dict['anchor_reference'] = tf.convert_to_tensor( self._anchor_reference) prediction_dict['gt_boxes'] = gt_boxes prediction_dict['conv_feature_map'] = conv_feature_map if self._with_rcnn: classification_pred = self._rcnn(conv_feature_map, rpn_prediction['proposals'], image_shape, gt_boxes=gt_boxes, is_training=is_training) prediction_dict['classification_prediction'] = classification_pred return prediction_dict
def valid_conclusion(gt_boxes): if gt_boxes is not None: gt_boxes = tf.cast(gt_boxes, tf.float32) # A Tensor with the feature map for the image, # its shape should be `(feature_height, feature_width, 512)`. # The shape depends of the pretrained network in use. # Set rank and last dimension before using base network # TODO: Why does it loose information when using queue? image.set_shape((None, None, 3)) # The RPN submodule which generates proposals of objects. self._rpn = RPN( self._num_anchors, self._config.model.rpn, debug=self._debug, seed=self._seed ) if self._with_rcnn: # The RCNN submodule which classifies RPN's proposals and # classifies them as background or a specific class. self._rcnn = RCNN( self._num_classes, self._config.model.rcnn, debug=self._debug, seed=self._seed ) image_shape = tf.shape(image)[0:2] variable_summaries( conv_feature_map, 'conv_feature_map', 'reduced' ) # Generate anchors for the image based on the anchor reference. all_anchors = self._generate_anchors(tf.shape(conv_feature_map)) rpn_prediction = self._rpn( conv_feature_map, image_shape, all_anchors, gt_boxes=gt_boxes, is_training=is_training ) prediction_dict["debug"] = (image, gt_boxes) prediction_dict["rpn_prediction"] = rpn_prediction if self._debug: prediction_dict['image'] = image prediction_dict['image_shape'] = image_shape prediction_dict['all_anchors'] = all_anchors prediction_dict['anchor_reference'] = tf.convert_to_tensor( self._anchor_reference ) if gt_boxes is not None: prediction_dict['gt_boxes'] = gt_boxes prediction_dict['conv_feature_map'] = conv_feature_map if self._with_rcnn: proposals = tf.stop_gradient(rpn_prediction['proposals']) classification_pred = self._rcnn( Head_hf_part_conv, proposals, image_shape, self.base_network, gt_boxes=gt_boxes, is_training=is_training ) prediction_dict['classification_prediction'] = classification_pred return prediction_dict