Example #1
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (H, W, C) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        batched_inputs = self.preprocess_image(batched_inputs)

        features = self.backbone(batched_inputs)
        if len(self.in_features) == 0:
            print(
                f"Error no input features for retinanet, use all features {features.keys()}"
            )
            features = list(features.values())
        else:
            features = [features[f] for f in self.in_features]
        pred_maps = self.head(features)
        gt_boxes = batched_inputs.get(GT_BOXES, None)
        gt_length = batched_inputs.get(GT_LENGTH, None)
        gt_labels = batched_inputs.get(GT_LABELS, None)
        gt_keypoints = batched_inputs.get(GT_KEYPOINTS, None)

        outputs = build_outputs(
            name=self.cfg.MODEL.KEYPOINTS.OUTPUTS,
            cfg=self.cfg.MODEL.KEYPOINTS,
            parent=self,
            pred_maps=pred_maps,
            gt_boxes=gt_boxes,
            gt_labels=gt_labels,
            gt_length=gt_length,
            gt_keypoints=gt_keypoints,
            max_detections_per_image=self.cfg.TEST.DETECTIONS_PER_IMAGE,
        )
        outputs.batched_inputs = batched_inputs

        if self.is_training:
            if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG:
                results = outputs.inference(inputs=batched_inputs,
                                            pred_maps=pred_maps)
            else:
                results = {}

            return results, outputs.losses()
        else:
            results = outputs.inference(inputs=batched_inputs,
                                        pred_maps=pred_maps)
            return results, {}
Example #2
0
    def forward(self, batched_inputs):
        outputs = self.snpe.tf_forward(batched_inputs[IMAGE])

        head_outputs = {}
        head_outputs['offset'] = outputs[0]
        head_outputs['heatmaps_ct'] = outputs[1]
        head_outputs['hw'] = outputs[2]
        head_outputs['id_embedding'] = outputs[3]
        head_outputs = [head_outputs]

        gt_boxes = batched_inputs.get(GT_BOXES, None)
        gt_length = batched_inputs.get(GT_LENGTH, None)
        gt_labels = batched_inputs.get(GT_LABELS, None)

        outputs = build_outputs(
            name=self.cfg.MODEL.SNPE.OUTPUTS,
            cfg=self.cfg.MODEL.SNPE,
            parent=self,
            box2box_transform=self.box2box_transform,
            head_outputs=head_outputs,
            gt_boxes=gt_boxes,
            gt_labels=gt_labels,
            gt_length=gt_length,
            max_detections_per_image=self.cfg.TEST.DETECTIONS_PER_IMAGE,
        )

        if self.is_training:
            if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG:
                results = outputs.inference(inputs=batched_inputs,
                                            head_outputs=head_outputs)
            else:
                results = {}

            return results, outputs.losses()
        else:
            results = outputs.inference(inputs=batched_inputs,
                                        head_outputs=head_outputs)
            return results, {}
Example #3
0
    def forward(self, inputs, features):

        features = [features[f] for f in self.in_features]

        gt_boxes = inputs.get(GT_BOXES, None)
        #gt_labels = inputs.gt_labels
        gt_length = inputs.get(GT_LENGTH, None)

        pred_objectness_logits, pred_anchor_deltas = self.rpn_head(
            inputs, features)
        anchors = self.rpn_head.anchor_generator(inputs, features)
        self.anchors_num_per_level = [
            wmlt.combined_static_and_dynamic_shape(x)[0] for x in anchors
        ]
        outputs = build_outputs(self.cfg.MODEL.RPN.OUTPUTS,
                                self.box2box_transform,
                                self.anchor_matcher,
                                self.batch_size_per_image,
                                self.positive_fraction,
                                pred_objectness_logits,
                                pred_anchor_deltas,
                                anchors,
                                gt_boxes,
                                gt_length=gt_length)
        if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG:
            outputs.inputs = inputs

        if self.is_training:
            losses = {
                k: v * self.loss_weight
                for k, v in outputs.losses().items()
            }
            rpn_threshold = 0.0
        else:
            rpn_threshold = self.cfg.MODEL.PROPOSAL_GENERATOR.SCORE_THRESH_TEST
            losses = {}

        # Find the top proposals by applying NMS and removing boxes that
        # are too small. The proposals are treated as fixed for approximate
        # joint training with roi heads. This approach ignores the derivative
        # w.r.t. the proposal boxes’ coordinates that are also network
        # responses, so is approximate.
        pre_nms_topk_max_per_layer = self.cfg.MODEL.RPN.PRE_NMS_TOPK_MAX_PER_LAYER
        proposals, logits = find_top_rpn_proposals(
            outputs.predict_proposals(),
            outputs.predict_objectness_logits(),
            self.nms_thresh,
            self.pre_nms_topk[self.is_training],
            self.post_nms_topk[self.is_training],
            self.anchors_num_per_level,
            score_threshold=rpn_threshold,
            is_training=self.is_training,
            pre_nms_topk_max_per_layer=pre_nms_topk_max_per_layer)
        if self.cfg.MODEL.RPN.SORT_RESULTS:
            with tf.name_scope("sort_rpn_results"):

                def fn(bboxes, keys):
                    N = wmlt.combined_static_and_dynamic_shape(keys)
                    new_keys, indices = tf.nn.top_k(keys, k=N[0])
                    bboxes = tf.gather(bboxes, indices)
                    return [bboxes, keys]

                proposals, logits = tf.map_fn(lambda x: fn(x[0], x[1]),
                                              elems=[proposals, logits],
                                              back_prop=False)

        outdata = {PD_BOXES: proposals, PD_PROBABILITY: tf.nn.sigmoid(logits)}
        wsummary.detection_image_summary(images=inputs[IMAGE],
                                         boxes=outdata[PD_BOXES],
                                         name="rpn/proposals")

        return outdata, losses
Example #4
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (H, W, C) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        batched_inputs = self.preprocess_image(batched_inputs)

        features = self.backbone(batched_inputs)
        features = [features[f] for f in self.in_features]
        head_outputs = self.head(features)
        anchors = self.anchor_generator(batched_inputs, features)
        gt_boxes = batched_inputs[GT_BOXES]
        gt_length = batched_inputs[GT_LENGTH]
        gt_labels = batched_inputs[GT_LABELS]

        outputs = build_outputs(
            name=self.cfg.MODEL.YOLACT.OUTPUTS,
            cfg=self.cfg.MODEL.YOLACT,
            parent=self,
            box2box_transform=self.box2box_transform,
            anchor_matcher=self.anchor_matcher,
            pred_logits=head_outputs[LOGITS],
            pred_anchor_deltas=head_outputs[BOXES_REGS],
            anchors=anchors,
            gt_boxes=gt_boxes,
            gt_labels=gt_labels,
            gt_length=gt_length,
            max_detections_per_image=self.cfg.TEST.DETECTIONS_PER_IMAGE,
            head_outputs=head_outputs,
            batched_inputs=batched_inputs,
            coefficient_nr=self.cfg.MODEL.YOLACT.PROTONET_NR,
        )

        if self.is_training:
            if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG:
                results = outputs.inference(inputs=batched_inputs,
                                            box_cls=head_outputs[LOGITS],
                                            box_delta=head_outputs[BOXES_REGS],
                                            anchors=anchors)
            else:
                results = {}

            return results, outputs.losses()
        else:
            results = outputs.inference(inputs=batched_inputs,
                                        box_cls=head_outputs[LOGITS],
                                        box_delta=head_outputs[BOXES_REGS],
                                        anchors=anchors)
            return results, {}
Example #5
0
    def forward(self, batched_inputs, features):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (H, W, C) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        if len(self.in_features) == 0:
            print(
                f"Error no input features for retinanet, use all features {features.keys()}"
            )
            features = list(features.values())
        else:
            features = [features[f] for f in self.in_features]
        pred_logits, pred_regression, pred_center_ness = self.head(features)
        gt_boxes = batched_inputs[GT_BOXES]
        gt_length = batched_inputs[GT_LENGTH]
        gt_labels = batched_inputs[GT_LABELS]

        outputs = build_outputs(
            name=self.cfg.MODEL.FCOSPG.OUTPUTS,
            cfg=self.cfg.MODEL.FCOSPG,
            parent=self,
            box2box_transform=self.box2box_transform,
            pred_logits=pred_logits,
            pred_regression=pred_regression,
            pred_center_ness=pred_center_ness,
            gt_boxes=gt_boxes,
            gt_labels=gt_labels,
            gt_length=gt_length,
            batched_inputs=batched_inputs,
            max_detections_per_image=self.cfg.TEST.DETECTIONS_PER_IMAGE,
        )

        results = outputs.inference(inputs=batched_inputs,
                                    box_cls=pred_logits,
                                    box_regression=pred_regression,
                                    center_ness=pred_center_ness)
        losses = {}
        if self.is_training:
            _losses = outputs.losses()
            for k, v in _losses.items():
                losses["pg_" + k] = v
        outdata = {
            PD_BOXES: results[RD_BOXES],
            PD_PROBABILITY: results[RD_PROBABILITY]
        }
        wsummary.detection_image_summary(images=batched_inputs[IMAGE],
                                         boxes=outdata[PD_BOXES],
                                         name="fcospg/proposals")

        return outdata, losses
Example #6
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (H, W, C) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        batched_inputs = self.preprocess_image(batched_inputs)

        features = self.backbone(batched_inputs)
        if len(self.in_features) == 0:
            print(
                f"Error no input features for deeplab, use all features {features.keys()}"
            )
            features = list(features.values())
        else:
            features = [features[f] for f in self.in_features]
        pred_logits = self.head(features)
        gt_labels = batched_inputs.get(GT_SEMANTIC_LABELS, None)

        outputs = build_outputs(
            name=self.cfg.MODEL.DEEPLAB.OUTPUTS,
            cfg=self.cfg.MODEL.DEEPLAB,
            parent=self,
            pred_logits=pred_logits,
            labels=gt_labels,
        )
        outputs.batched_inputs = batched_inputs
        max_outputs = 3
        wsummary.batch_semantic_summary(batched_inputs[IMAGE],
                                        masks=gt_labels[..., 1:],
                                        max_outputs=max_outputs,
                                        name="gt")

        if self.is_training:
            if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG:
                results = outputs.inference(inputs=batched_inputs,
                                            logits=pred_logits)
                wsummary.batch_semantic_summary(batched_inputs[IMAGE],
                                                masks=results[RD_SEMANTIC][...,
                                                                           1:],
                                                max_outputs=max_outputs,
                                                name="pred")
                wsummary.feature_map_summary(gt_labels,
                                             name="gt_semantic",
                                             max_outputs=10)
                wsummary.feature_map_summary(results[RD_SEMANTIC],
                                             name="pred_semantic",
                                             max_outputs=10)
            else:
                results = {}

            return results, outputs.losses()
        else:
            results = outputs.inference(inputs=batched_inputs,
                                        logits=pred_logits)
            wsummary.batch_semantic_summary(batched_inputs[IMAGE],
                                            masks=results[RD_SEMANTIC][...,
                                                                       1:],
                                            max_outputs=max_outputs,
                                            name="pred")
            wsummary.feature_map_summary(gt_labels,
                                         name="gt_semantic",
                                         max_outputs=10)
            wsummary.feature_map_summary(results[RD_SEMANTIC],
                                         name="pred_semantic",
                                         max_outputs=10)
            return results, {}