def _build(self, image, gt_boxes=None, is_training=False):

        if gt_boxes is not None:
            gt_boxes = tf.cast(gt_boxes, tf.float32)

        image.set_shape((None, None, 3))

        conv_feature_map = self.base_network(
            tf.expand_dims(image, 0), is_training=is_training
        )

        # The RPN submodule which generates proposals of objects.
        self._rpn = RPN(
            self._num_anchors, self._config.model.rpn,
            debug=self._debug, seed=self._seed
        )
        if self._with_rcnn:
            self._rcnn = RCNN(
                self._num_classes, self._config.model.rcnn,
                debug=self._debug, seed=self._seed
            )

        image_shape = tf.shape(image)[0:2]

        variable_summaries(
            conv_feature_map, 'conv_feature_map', 'reduced'
        )

        all_anchors = self._generate_anchors(tf.shape(conv_feature_map))
        rpn_prediction = self._rpn(
            conv_feature_map, image_shape, all_anchors,
            gt_boxes=gt_boxes, is_training=is_training
        )

        prediction_dict = {
            'rpn_prediction': rpn_prediction,
        }

        if self._debug:
            prediction_dict['image'] = image
            prediction_dict['image_shape'] = image_shape
            prediction_dict['all_anchors'] = all_anchors
            prediction_dict['anchor_reference'] = tf.convert_to_tensor(
                self._anchor_reference
            )
            if gt_boxes is not None:
                prediction_dict['gt_boxes'] = gt_boxes
            prediction_dict['conv_feature_map'] = conv_feature_map

        if self._with_rcnn:
            proposals = tf.stop_gradient(rpn_prediction['proposals'])
            classification_pred = self._rcnn(
                conv_feature_map, proposals,
                image_shape, self.base_network,
                gt_boxes=gt_boxes, is_training=is_training
            )

            prediction_dict['classification_prediction'] = classification_pred

        return prediction_dict
Beispiel #2
0
    def testFocalL1Loss(self):
        """Tests that focal & smooth l1 for classification, regression
        loss respectively returns reasonable values in simple cases.
        """
        config = self.config
        config["loss"] = {"type": "focal"}
        model = RPN(self.num_anchors, config, debug=True)

        # Define placeholders that are used inside the loss method.
        rpn_cls_prob = tf.placeholder(tf.float32)
        rpn_cls_target = tf.placeholder(tf.float32)
        rpn_cls_score = tf.placeholder(tf.float32)
        rpn_bbox_target = tf.placeholder(tf.float32)
        rpn_bbox_pred = tf.placeholder(tf.float32)

        loss = model.loss({
            "rpn_cls_prob": rpn_cls_prob,
            "rpn_cls_target": rpn_cls_target,
            "rpn_cls_score": rpn_cls_score,
            "rpn_bbox_target": rpn_bbox_target,
            "rpn_bbox_pred": rpn_bbox_pred,
        })

        # Test perfect score.
        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            loss_dict = sess.run(
                loss,
                feed_dict={
                    # Probability is (background_prob, foreground_prob)
                    rpn_cls_prob: [[0, 1], [1.0, 0]],
                    # Target: 1 being foreground, 0 being background.
                    rpn_cls_target: [1, 0],
                    # Class scores before applying softmax. Since using cross
                    # entropy, we need a big difference between values.
                    rpn_cls_score: [[-100.0, 100.0], [100.0, -100.0]],
                    # Targets and predictions are exactly equal.
                    rpn_bbox_target: [[0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1]],
                    rpn_bbox_pred: [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1,
                                                           0.1]],
                },
            )

            # Assert close since cross-entropy could return very small value.
            self.assertAllClose(tuple(loss_dict.values()), (0, 0))
Beispiel #3
0
    def _build(self, image, gt_boxes=None, is_training=False):
        """
        Returns bounding boxes and classification probabilities.

        Args:
            image: A tensor with the image.
                Its shape should be `(height, width, 3)`.
            gt_boxes: A tensor with all the ground truth boxes of that image.
                Its shape should be `(num_gt_boxes, 5)`
                Where for each gt box we have (x1, y1, x2, y2, label),
                in that order.
            is_training: A boolean to whether or not it is used for training.

        Returns:
            classification_prob: A tensor with the softmax probability for
                each of the bounding boxes found in the image.
                Its shape should be: (num_bboxes, num_categories + 1)
            classification_bbox: A tensor with the bounding boxes found.
                It's shape should be: (num_bboxes, 4). For each of the bboxes
                we have (x1, y1, x2, y2)
        """
        if gt_boxes is not None:
            gt_boxes = tf.cast(gt_boxes, tf.float32)
        # A Tensor with the feature map for the image,
        # its shape should be `(feature_height, feature_width, 512)`.
        # The shape depends of the pretrained network in use.

        # Set rank and last dimension before using base network
        # TODO: Why does it loose information when using queue?
        image.set_shape((None, None, 3))

        conv_feature_map = self.base_network(tf.expand_dims(image, 0),
                                             is_training=is_training)

        # The RPN submodule which generates proposals of objects.
        self._rpn = RPN(
            self._num_anchors,
            self._config.model.rpn,
            debug=self._debug,
            seed=self._seed,
        )
        if self._with_rcnn:
            # The RCNN submodule which classifies RPN's proposals and
            # classifies them as background or a specific class.
            self._rcnn = RCNN(
                self._num_classes,
                self._config.model.rcnn,
                debug=self._debug,
                seed=self._seed,
            )

        image_shape = tf.shape(image)[0:2]

        variable_summaries(conv_feature_map, "conv_feature_map", "reduced")

        # Generate anchors for the image based on the anchor reference.
        all_anchors = self._generate_anchors(tf.shape(conv_feature_map))
        rpn_prediction = self._rpn(
            conv_feature_map,
            image_shape,
            all_anchors,
            gt_boxes=gt_boxes,
            is_training=is_training,
        )

        prediction_dict = {
            "rpn_prediction": rpn_prediction,
        }

        if self._debug:
            prediction_dict["image"] = image
            prediction_dict["image_shape"] = image_shape
            prediction_dict["all_anchors"] = all_anchors
            prediction_dict["anchor_reference"] = tf.convert_to_tensor(
                self._anchor_reference)
            if gt_boxes is not None:
                prediction_dict["gt_boxes"] = gt_boxes
            prediction_dict["conv_feature_map"] = conv_feature_map

        if self._with_rcnn:
            proposals = tf.stop_gradient(rpn_prediction["proposals"])
            classification_pred = self._rcnn(
                conv_feature_map,
                proposals,
                image_shape,
                self.base_network,
                gt_boxes=gt_boxes,
                is_training=is_training,
            )

            prediction_dict["classification_prediction"] = classification_pred

        return prediction_dict
Beispiel #4
0
    def testBasic(self):
        """Tests shapes are consistent with anchor generation.
        """
        model = RPN(self.num_anchors, self.config, debug=True)
        # (plus the batch number)
        pretrained_output_shape = (1, 32, 32, 512)
        pretrained_output = tf.placeholder(tf.float32,
                                           shape=pretrained_output_shape)

        # Estimate image shape from the pretrained output and the anchor stride
        image_shape_val = (
            int(pretrained_output_shape[1] * self.stride),
            int(pretrained_output_shape[2] * self.stride),
        )

        # Use 4 ground truth boxes.
        gt_boxes_shape = (4, 4)
        gt_boxes = tf.placeholder(tf.float32, shape=gt_boxes_shape)
        image_shape_shape = (2, )
        image_shape = tf.placeholder(tf.float32, shape=image_shape_shape)
        # Total anchors depends on the pretrained output shape and the total
        # number of anchors per point.
        total_anchors = (pretrained_output_shape[1] *
                         pretrained_output_shape[2] * self.num_anchors)
        all_anchors_shape = (total_anchors, 4)
        all_anchors = tf.placeholder(tf.float32, shape=all_anchors_shape)
        layers = model(pretrained_output,
                       image_shape,
                       all_anchors,
                       gt_boxes=gt_boxes)

        with self.test_session() as sess:
            # As in the case of a real session we need to initialize the
            # variables.
            sess.run(tf.global_variables_initializer())
            layers_inst = sess.run(
                layers,
                feed_dict={
                    # We don't really care about the value of the pretrained output
                    # only that has the correct shape.
                    pretrained_output:
                    np.random.rand(*pretrained_output_shape),
                    # Generate random but valid ground truth boxes.
                    gt_boxes:
                    generate_gt_boxes(gt_boxes_shape[0], image_shape_val),
                    # Generate anchors from a reference and the shape of the
                    # pretrained_output.
                    all_anchors:
                    generate_anchors(
                        generate_anchors_reference(self.base_size, self.ratios,
                                                   self.scales), 16,
                        pretrained_output_shape[1:3]),
                    image_shape:
                    image_shape_val,
                })

        # Class score generates 2 values per anchor.
        rpn_cls_score_shape = layers_inst['rpn_cls_score'].shape
        rpn_cls_score_true_shape = (total_anchors, 2)
        self.assertEqual(rpn_cls_score_shape, rpn_cls_score_true_shape)

        # Probs have the same shape as cls scores.
        rpn_cls_prob_shape = layers_inst['rpn_cls_prob'].shape
        self.assertEqual(rpn_cls_prob_shape, rpn_cls_score_true_shape)

        # We check softmax with the sum of the output.
        rpn_cls_prob_sum = layers_inst['rpn_cls_prob'].sum(axis=1)
        self.assertAllClose(rpn_cls_prob_sum, np.ones(total_anchors))

        # Proposals and scores are related to the output of the NMS with
        # limits.
        total_proposals = layers_inst['proposals'].shape[0]
        total_scores = layers_inst['scores'].shape[0]

        # Check we don't get more than top_n proposals.
        self.assertGreaterEqual(self.config.proposals.post_nms_top_n,
                                total_proposals)

        # Check we get a score for each proposal.
        self.assertEqual(total_proposals, total_scores)

        # Check that we get a regression for each anchor.
        self.assertEqual(layers_inst['rpn_bbox_pred'].shape,
                         (total_anchors, 4))

        # Check that we get a target for each regression for each anchor.
        self.assertEqual(layers_inst['rpn_bbox_target'].shape,
                         (total_anchors, 4))

        # Check that we get a target class for each anchor.
        self.assertEqual(layers_inst['rpn_cls_target'].shape,
                         (total_anchors, ))

        # Check that targets are composed of [-1, 0, 1] only.
        rpn_cls_target = layers_inst['rpn_cls_target']
        self.assertEqual(tuple(np.sort(np.unique(rpn_cls_target))),
                         (-1, 0., 1.))

        batch_cls_target = rpn_cls_target[(rpn_cls_target == 0.) |
                                          (rpn_cls_target == 1.)]

        # Check that the non negative target class are exactly the size
        # as the minibatch
        self.assertEqual(batch_cls_target.shape,
                         (self.config.target.minibatch_size, ))

        # Check that we get upto foreground_fraction of positive anchors.
        self.assertLessEqual(
            batch_cls_target[batch_cls_target == 1.].shape[0] /
            batch_cls_target.shape[0], self.config.target.foreground_fraction)
Beispiel #5
0
    def testTypes(self):
        """Tests that return types are the expected ones.
        """
        # We repeat testBasic's setup.
        model = RPN(self.num_anchors, self.config, debug=True)
        pretrained_output_shape = (1, 32, 32, 512)
        pretrained_output = tf.placeholder(tf.float32,
                                           shape=pretrained_output_shape)

        image_shape_val = (
            int(pretrained_output_shape[1] * self.stride),
            int(pretrained_output_shape[2] * self.stride),
        )

        gt_boxes_shape = (4, 4)
        gt_boxes = tf.placeholder(tf.float32, shape=gt_boxes_shape)
        image_shape_shape = (2, )
        image_shape = tf.placeholder(tf.float32, shape=image_shape_shape)

        total_anchors = (pretrained_output_shape[1] *
                         pretrained_output_shape[2] * self.num_anchors)
        all_anchors_shape = (total_anchors, 4)
        all_anchors = tf.placeholder(tf.float32, shape=all_anchors_shape)
        layers = model(pretrained_output,
                       image_shape,
                       all_anchors,
                       gt_boxes=gt_boxes)

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            layers_inst = sess.run(
                layers,
                feed_dict={
                    pretrained_output:
                    np.random.rand(*pretrained_output_shape),
                    gt_boxes:
                    generate_gt_boxes(gt_boxes_shape[0], image_shape_val),
                    all_anchors:
                    generate_anchors(
                        generate_anchors_reference(self.base_size, self.ratios,
                                                   self.scales), 16,
                        pretrained_output_shape[1:3]),
                    image_shape:
                    image_shape_val,
                })

        # Assertions
        proposals = layers_inst['proposals']
        scores = layers_inst['scores']
        rpn_cls_prob = layers_inst['rpn_cls_prob']
        rpn_cls_score = layers_inst['rpn_cls_score']
        rpn_bbox_pred = layers_inst['rpn_bbox_pred']
        rpn_cls_target = layers_inst['rpn_cls_target']
        rpn_bbox_target = layers_inst['rpn_bbox_target']
        # Everything should have dtype=tf.float32
        self.assertAllEqual(
            # We have 7 values we want to compare to tf.float32.
            [tf.float32] * 7,
            [
                proposals.dtype,
                scores.dtype,
                rpn_cls_prob.dtype,
                rpn_cls_score.dtype,
                rpn_bbox_pred.dtype,
                rpn_cls_target.dtype,
                rpn_bbox_target.dtype,
            ])
    def _build(self, image, gt_boxes=None, is_training=False):
        """
        Returns bounding boxes and classification probabilities.

        Args:
            image: A tensor with the image.
                Its shape should be `(height, width, 3)`.
            gt_boxes: A tensor with all the ground truth boxes of that image.
                Its shape should be `(num_gt_boxes, 5)`
                Where for each gt box we have (x1, y1, x2, y2, label),
                in that order.
            is_training: A boolean to whether or not it is used for training.

        Returns:
            classification_prob: A tensor with the softmax probability for
                each of the bounding boxes found in the image.
                Its shape should be: (num_bboxes, num_categories + 1)
            classification_bbox: A tensor with the bounding boxes found.
                It's shape should be: (num_bboxes, 4). For each of the bboxes
                we have (x1, y1, x2, y2)
        """

        #### use variable_scope to split BodyDetector and PartDetector



        if gt_boxes is not None:
            gt_boxes = tf.cast(gt_boxes, tf.float32)
        # A Tensor with the feature map for the image,
        # its shape should be `(feature_height, feature_width, 512)`.
        # The shape depends of the pretrained network in use.

        # Set rank and last dimension before using base network
        # TODO: Why does it loose information when using queue?
        image.set_shape((None, None, 3))

        conv_feature_map = self.base_network(
            tf.expand_dims(image, 0), is_training=is_training
        )

        C4 = conv_feature_map
        with tf.variable_scope("C5"):
            C5 = self.iter_unify_layer(C4, is_training=is_training)
            #C5 = self.unify_layer(C4, is_training=is_training)

        with tf.variable_scope("Head_body_part"):
            Head_body_part = self.iter_unify_layer(C5, is_training=is_training)
            #Head_body_part = self.unify_layer(C5, is_training=is_training)

        with tf.variable_scope("Head_hf_part"):
            Head_hf_part = self.iter_unify_layer(C5, is_training=is_training)
            #Head_hf_part = self.unify_layer(C5, is_training=is_training)

        with tf.variable_scope("Head_hf_part_conv"):
            Head_hf_part_conv = self.iter_unify_layer(
                Head_hf_part, is_training=is_training
            )

        # The RPN submodule which generates proposals of objects.
        self._rpn = RPN(
            self._num_anchors, self._config.model.rpn,
            debug=self._debug, seed=self._seed
        )

        if self._with_rcnn:
            # The RCNN submodule which classifies RPN's proposals and
            # classifies them as background or a specific class.
            self._rcnn = RCNN(
                self._num_classes, self._config.model.rcnn,
                debug=self._debug, seed=self._seed,
                name="__rcnn__1"
            )

        image_shape = tf.shape(image)[0:2]

        variable_summaries(
            conv_feature_map, 'conv_feature_map', 'reduced'
        )

        # Generate anchors for the image based on the anchor reference.
        all_anchors_1 = self._generate_anchors(tf.shape(conv_feature_map))

        rpn_1_prediction = self._rpn(
            conv_feature_map, image_shape, all_anchors_1,
            gt_boxes=gt_boxes, is_training=is_training
        )

        prediction_1_dict = {
            'rpn_prediction': rpn_1_prediction,
        }

        if self._debug:
            prediction_1_dict['image'] = image
            prediction_1_dict['image_shape'] = image_shape
            prediction_1_dict['all_anchors'] = all_anchors_1
            prediction_1_dict['anchor_reference'] = tf.convert_to_tensor(
                self._anchor_reference
            )
            if gt_boxes is not None:
                prediction_1_dict['gt_boxes'] = gt_boxes
            prediction_1_dict['conv_feature_map'] = conv_feature_map

        if self._with_rcnn:
            proposals = tf.stop_gradient(rpn_1_prediction['proposals'])

            rpn_1_proposals = proposals

            classification_pred = self._rcnn(
                Head_body_part, proposals,
                image_shape, self.base_network,
                gt_boxes=gt_boxes, is_training=is_training
            )

            #### retrieve req from classification_pred
            without_filter_dict = classification_pred["without_filter_dict"]

            objects_1_all = without_filter_dict["objects"]
            labels_1_all = without_filter_dict["proposal_label"]
            probs_1_all = without_filter_dict["proposal_label_prob"]

            objects_1 = classification_pred["objects"]
            labels_1 = classification_pred["labels"]
            probs_1 = classification_pred["probs"]

            prediction_1_dict['objects'] = objects_1
            prediction_1_dict['labels'] = labels_1
            prediction_1_dict['probs'] = probs_1

            top_indices = tf.nn.top_k(tf.cast(1 - tf.sign(tf.abs(labels_1_all - self._main_part_label)), dtype=tf.float32) + probs_1_all,
                                      k = tf.shape(labels_1_all)[0]).indices

            objects_1_sorted = tf.gather(objects_1_all ,top_indices)
            filter_num = tf.minimum(tf.shape(objects_1_sorted)[0], 7)

            objects_1_filtered = tf.slice(objects_1_sorted, begin=[0, 0], size=[filter_num, 4])
            #### expand with label [?, 4] -> [?, 5]
            objects_1_filtered = tf.concat([objects_1_filtered, tf.fill([tf.shape(objects_1_filtered)[0], 1], value=tf.convert_to_tensor(self._main_part_label,
                                                                                                                                         dtype=tf.float32))],
                                           axis=-1)

            prediction_1_dict['classification_prediction'] = classification_pred

            if gt_boxes is not None:
                body_feature_ground_truth = self.generate_PartDetector_features(
                    input_image=image, input_feature=Head_hf_part, gt_boxes = gt_boxes, only_main_part_boxes=False
                )
                body_feature_pred = self.generate_PartDetector_features(
                    input_image=image, input_feature=Head_hf_part, gt_boxes=tf.concat([tf.gather(gt_boxes, tf.reshape(tf.where(tf.not_equal(gt_boxes[:, -1], self._main_part_label)), [-1])),
                                                                                       objects_1_filtered], axis=0)
                    ,only_main_part_boxes=False)
            else:
                body_feature_ground_truth = None
                body_feature_pred = self.generate_PartDetector_features(
                    input_image=image, input_feature=Head_hf_part, gt_boxes=objects_1_filtered,
                    only_main_part_boxes=True
                )

            #### use as fake placeholder
            if gt_boxes is not None:
                body_feature_pred = tf.reshape(body_feature_pred, [-1, tf.shape(body_feature_ground_truth)[-1]])
            else:
                body_feature_pred = tf.reshape(body_feature_pred, [-1, 147461])

            #### unstack it in firxt dim and "map reduce" it on modified faster-rcnn
            #### but the input ground truth label should perform label remapping is the "decoder" of single feature
            fixed_sliced_size ,PartDetector_feature_stacked = self.padding_and_slice_PartDetector_features(body_pred_feature=body_feature_pred, body_ground_truth_feature=body_feature_ground_truth)
            PartDetector_feature_stacked = tf.slice(PartDetector_feature_stacked, begin=[0, 0], size=[fixed_sliced_size, -1])

            if gt_boxes is not None:
                PartDetector_feature_stacked = tf.gather(PartDetector_feature_stacked, tf.random_shuffle(tf.range(fixed_sliced_size)))
                PartDetector_feature_stacked = tf.reshape(PartDetector_feature_stacked, [fixed_sliced_size, -1])
                PartDetector_feature_unstacked = [PartDetector_feature_stacked[0,...]]
            else:
                PartDetector_feature_unstacked = tf.unstack(PartDetector_feature_stacked, axis=0)
            partdetector_dict_list = []

            for single_partdetector_feature in PartDetector_feature_unstacked:
                if gt_boxes is not None:
                    main_part_ori_bbox ,cropped_feature, cropped_bboxes  = self.decode_single_unstacked_feature(input_feature=single_partdetector_feature, only_main_part_boxes = True if gt_boxes is None else False)
                else:
                    main_part_ori_bbox, cropped_feature = self.decode_single_unstacked_feature(input_feature=single_partdetector_feature, only_main_part_boxes = True if gt_boxes is None else False)
                    cropped_bboxes = None

                x1, y1, x2, y2 ,_ = tf.split(main_part_ori_bbox, 5)
                x1, y1, x2, y2 = map(lambda x: tf.cast(tf.reshape(x, []), tf.int32), [x1, y1, x2, y2])

                cropped_image = tf.image.crop_to_bounding_box(image=image, offset_height=y1, offset_width=x1, target_height=y2 - y1 + 1, target_width=x2 - x1 + 1)
                cropped_feature = tf.expand_dims(cropped_feature, 0)

                input_feature = Head_hf_part_conv
                image_h, image_w = tf.split(tf.shape(image)[0:2], num_or_size_splits=2)
                feature_h, feature_w = tf.split(tf.shape(input_feature)[1:3], num_or_size_splits=2)

                t4 = [x1, y1, x2, y2]
                Head_hf_part_conv = tf.slice(input_feature,
                                             begin=[0,
                                                    tf.reshape(tf.cast(tf.cast(t4[1], tf.float32) / tf.cast(image_h, tf.float32) * tf.cast(feature_h, tf.float32), tf.int32), []),
                                                    tf.reshape(tf.cast(tf.cast(t4[0], tf.float32) / tf.cast(image_w, tf.float32) * tf.cast(feature_w, tf.float32), tf.int32), []),
                                                    0],
                                             size=[-1,
                                                   tf.reshape(tf.cast(tf.cast(t4[3] - t4[1], tf.float32)/ tf.cast(image_h, tf.float32) * tf.cast(feature_h, tf.float32), tf.int32), []) ,
                                                   tf.reshape(tf.cast(tf.cast(t4[2] - t4[0], tf.float32) / tf.cast(image_w, tf.float32) * tf.cast(feature_w, tf.float32), tf.int32), []) ,
                                                   256]
                                             )

                #### Head_hf_part_conv  not crop, test the efficiency
                partdetector_dict = self.partdetetor(conv_feature_map = cropped_feature, Head_hf_part_conv = Head_hf_part_conv ,image = cropped_image, gt_boxes = cropped_bboxes, is_training = is_training)

                partdetector_dict["main_info"] = {
                    "image": image,
                    "main_part_ori_bbox": main_part_ori_bbox
                }

                partdetector_dict_list.append(partdetector_dict)

            return [prediction_1_dict] + partdetector_dict_list
Beispiel #7
0
    def _build(self, image, gt_boxes=None, is_training=True):
        """
        Returns bounding boxes and classification probabilities.

        Args:
            image: A tensor with the image.
                Its shape should be `(1, height, width, 3)`.
            gt_boxes: A tensor with all the ground truth boxes of that image.
                Its shape should be `(num_gt_boxes, 5)`
                Where for each gt box we have (x1, y1, x2, y2, label),
                in that order.
            is_training: A boolean to whether or not it is used for training.

        Returns:
            classification_prob: A tensor with the softmax probability for
                each of the bounding boxes found in the image.
                Its shape should be: (num_bboxes, num_categories + 1)
            classification_bbox: A tensor with the bounding boxes found.
                It's shape should be: (num_bboxes, 4). For each of the bboxes
                we have (x1, y1, x2, y2)
        """
        if gt_boxes is not None:
            gt_boxes = tf.cast(gt_boxes, tf.float32)
        # A Tensor with the feature map for the image,
        # its shape should be `(feature_height, feature_width, 512)`.
        # The shape depends of the pretrained network in use.
        conv_feature_map = self.base_network(image, is_training=is_training)

        # The RPN submodule which generates proposals of objects.
        self._rpn = RPN(self._num_anchors,
                        self._config.rpn,
                        debug=self._debug,
                        seed=self._seed)
        if self._with_rcnn:
            # The RCNN submodule which classifies RPN's proposals and
            # classifies them as background or a specific class.
            self._rcnn = RCNN(self._num_classes,
                              self._config.rcnn,
                              debug=self._debug,
                              seed=self._seed)

        image_shape = tf.shape(image)[1:3]

        variable_summaries(conv_feature_map, 'conv_feature_map', ['rpn'])

        # Generate anchors for the image based on the anchor reference.
        all_anchors = self._generate_anchors(tf.shape(conv_feature_map))
        rpn_prediction = self._rpn(conv_feature_map,
                                   image_shape,
                                   all_anchors,
                                   gt_boxes=gt_boxes)

        prediction_dict = {
            'rpn_prediction': rpn_prediction,
        }

        if self._debug:
            prediction_dict['image'] = image
            prediction_dict['image_shape'] = image_shape
            prediction_dict['all_anchors'] = all_anchors
            prediction_dict['anchor_reference'] = tf.convert_to_tensor(
                self._anchor_reference)
            prediction_dict['gt_boxes'] = gt_boxes
            prediction_dict['conv_feature_map'] = conv_feature_map

        if self._with_rcnn:
            classification_pred = self._rcnn(conv_feature_map,
                                             rpn_prediction['proposals'],
                                             image_shape,
                                             gt_boxes=gt_boxes,
                                             is_training=is_training)

            prediction_dict['classification_prediction'] = classification_pred

        return prediction_dict
Beispiel #8
0
        def valid_conclusion(gt_boxes):
            if gt_boxes is not None:
                gt_boxes = tf.cast(gt_boxes, tf.float32)
            # A Tensor with the feature map for the image,
            # its shape should be `(feature_height, feature_width, 512)`.
            # The shape depends of the pretrained network in use.

            # Set rank and last dimension before using base network
            # TODO: Why does it loose information when using queue?
            image.set_shape((None, None, 3))

            # The RPN submodule which generates proposals of objects.
            self._rpn = RPN(
                self._num_anchors, self._config.model.rpn,
                debug=self._debug, seed=self._seed
            )
            if self._with_rcnn:
                # The RCNN submodule which classifies RPN's proposals and
                # classifies them as background or a specific class.
                self._rcnn = RCNN(
                    self._num_classes, self._config.model.rcnn,
                    debug=self._debug, seed=self._seed
                )

            image_shape = tf.shape(image)[0:2]

            variable_summaries(
                conv_feature_map, 'conv_feature_map', 'reduced'
            )

            # Generate anchors for the image based on the anchor reference.
            all_anchors = self._generate_anchors(tf.shape(conv_feature_map))


            rpn_prediction = self._rpn(
                conv_feature_map, image_shape, all_anchors,
                gt_boxes=gt_boxes, is_training=is_training
            )

            prediction_dict["debug"] = (image, gt_boxes)

            prediction_dict["rpn_prediction"] = rpn_prediction

            if self._debug:
                prediction_dict['image'] = image
                prediction_dict['image_shape'] = image_shape
                prediction_dict['all_anchors'] = all_anchors
                prediction_dict['anchor_reference'] = tf.convert_to_tensor(
                    self._anchor_reference
                )
                if gt_boxes is not None:
                    prediction_dict['gt_boxes'] = gt_boxes
                prediction_dict['conv_feature_map'] = conv_feature_map

            if self._with_rcnn:
                proposals = tf.stop_gradient(rpn_prediction['proposals'])

                classification_pred = self._rcnn(
                    Head_hf_part_conv, proposals,
                    image_shape, self.base_network,
                    gt_boxes=gt_boxes, is_training=is_training
                )

                prediction_dict['classification_prediction'] = classification_pred

            return prediction_dict