Beispiel #1
0
    def inference(self,
                  inputs,
                  box_cls,
                  box_regression,
                  center_ness,
                  nms=None,
                  pad=True):
        """
        Arguments:
            inputs: same as FCOS.forward's batched_inputs
            box_cls: list of Tensor, Tensor's shape is [B,H,W,A*num_classes]
            box_delta: list of Tensor, Tensor's shape is [B,H,W,A*4]
        Returns:
            results:
            RD_BOXES: [B,N,4]
            RD_LABELS: [B,N]
            RD_PROBABILITY:[ B,N]
            RD_LENGTH:[B]
        """
        assert len(box_cls[0].get_shape()) == 4, "error box cls dims"
        assert len(box_regression[0].get_shape()) == 4, "error box delta dims"

        B, _, _, _ = wmlt.combined_static_and_dynamic_shape(box_regression[0])
        fm_sizes = [tf.shape(x)[1:3] for x in box_regression]
        box_cls = [reshape_to_N_HWA_K(x, self.num_classes) for x in box_cls]
        box_regression = [reshape_to_N_HWA_K(x, 4) for x in box_regression]
        center_ness = [tf.reshape(x, [B, -1]) for x in center_ness]
        box_cls = tf.concat(box_cls, axis=1)
        box_regression = tf.concat(box_regression, axis=1)
        center_ness = tf.concat(center_ness, axis=1)

        results = wmlt.static_or_dynamic_map_fn(
            lambda x: self.inference_single_image(
                x[0], x[1], x[2], fm_sizes, nms=nms, pad=pad),
            elems=[box_cls, box_regression, center_ness],
            dtype=[tf.float32, tf.int32, tf.float32, tf.int32],
            back_prop=False)
        outdata = {
            RD_BOXES: results[0],
            RD_LABELS: results[1],
            RD_PROBABILITY: results[2],
            RD_LENGTH: results[3]
        }
        if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG:
            wsummary.detection_image_summary(
                images=inputs[IMAGE],
                boxes=outdata[RD_BOXES],
                classes=outdata[RD_LABELS],
                lengths=outdata[RD_LENGTH],
                scores=outdata[RD_PROBABILITY],
                name="FCOSGIou_result",
                category_index=DataLoader.category_index)
        return outdata
Beispiel #2
0
    def inference(self, inputs, box_cls, box_delta, anchors, output_fix_nr=0):
        """
        Arguments:
            inputs: same as RetinaNet.forward's batched_inputs
            box_cls: list of Tensor, Tensor's shape is [B,H,W,A*num_classes]
            box_delta: list of Tensor, Tensor's shape is [B,H,W,A*4]
            anchors: list of Tensor, Tensor's shape is [X,4]( X=H*W*A)
        Returns:
            results:
            RD_BOXES: [B,N,4]
            RD_LABELS: [B,N]
            RD_PROBABILITY:[ B,N]
            RD_LENGTH:[B]
        """
        assert len(anchors[0].get_shape()) == 2, "error anchors dims"
        assert len(box_cls[0].get_shape()) == 4, "error box cls dims"
        assert len(box_delta[0].get_shape()) == 4, "error box delta dims"

        anchors_size = [tf.shape(x)[0] for x in anchors]
        anchors = tf.concat(anchors, axis=0)

        box_cls = [reshape_to_N_HWA_K(x, self.num_classes) for x in box_cls]
        box_delta = [reshape_to_N_HWA_K(x, 4) for x in box_delta]
        box_cls = tf.concat(box_cls, axis=1)
        box_delta = tf.concat(box_delta, axis=1)

        results = wmlt.static_or_dynamic_map_fn(
            lambda x: self.inference_single_image(x[0], x[1], anchors,
                                                  anchors_size, output_fix_nr),
            elems=[box_cls, box_delta],
            dtype=[tf.float32, tf.int32, tf.float32, tf.int32, tf.int32],
            back_prop=False)
        outdata = {
            RD_BOXES: results[0],
            RD_LABELS: results[1],
            RD_PROBABILITY: results[2],
            RD_LENGTH: results[4],
            RD_INDICES: results[3]
        }
        if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG:
            wsummary.detection_image_summary(
                images=inputs[IMAGE],
                boxes=outdata[RD_BOXES],
                classes=outdata[RD_LABELS],
                lengths=outdata[RD_LENGTH],
                scores=outdata[RD_PROBABILITY],
                name="RetinaNet_result",
                category_index=DataLoader.category_index)
        return outdata
Beispiel #3
0
    def forward(self, inputs):
        """
        Args:
            Same as in :class:`GeneralizedRCNN.forward`

        Returns:
            list[dict]:
                Each dict is the output for one input image.
                The dict contains one key "proposals" whose value is a
                :class:`Instances` with keys "proposal_boxes" and "objectness_logits".
        """
        inputs = self.preprocess_image(inputs)
        features = self.backbone(inputs)
        if isinstance(features,(list,tuple)):
            features = features[0]
        outdata,proposal_losses = self.proposal_generator(inputs, features)
        wsummary.detection_image_summary(images=inputs['image'],boxes=outdata[PD_BOXES],name="proposal_boxes")
        return outdata,proposal_losses
Beispiel #4
0
    def _forward_box(self, inputs,features, proposals):
        head_outputs = []
        img_size = get_img_size_from_batched_inputs(inputs)
        for k in range(self.num_cascade_stages):
            if k > 0:
                # The output boxes of the previous stage are the input proposals of the next stage
                proposals_boxes = head_outputs[-1].predict_boxes_for_gt_classes()
                if self.is_training:
                    proposals = self._match_and_label_boxes(inputs,proposals_boxes,
                                                                stage=k)
                else:
                    proposals = {PD_BOXES:proposals_boxes}
            head_outputs.append(self._run_stage(features, proposals, k,img_size=img_size))
            if self.cfg.GLOBAL.SUMMARY_LEVEL<=SummaryLevel.DEBUG:
                results = head_outputs[-1].inference(
                    self.test_score_thresh,
                    self.test_nms_thresh,
                    self.test_detections_per_img)
                wsummary.detection_image_summary(images=inputs[IMAGE],
                                         boxes=results[RD_BOXES], classes=results[RD_LABELS],
                                         lengths=results[RD_LENGTH],
                                         name=f"RCNN_result{k}")

        if self.is_training:
            losses = {}
            for stage, output in enumerate(head_outputs):
                stage_losses = output.losses()
                losses.update({k + "_stage{}".format(stage): v for k, v in stage_losses.items()})
            return losses
        else:
            # Each is a list[Tensor] of length #image. Each tensor is Ri x (K+1)
            scores_per_stage = [h.predict_probs() for h in head_outputs]

            # Average the scores across heads
            scores = tf.stack(scores_per_stage,axis=-1)
            scores = tf.reduce_mean(scores,axis=-1,keepdims=False)
            # Use the boxes of the last head
            pred_instances = head_outputs[-1].inference(
                self.test_score_thresh,
                self.test_nms_thresh,
                self.test_detections_per_img,
                scores = scores
            )
            return pred_instances
Beispiel #5
0
    def inference(self, inputs, box_cls, box_delta, anchors):
        """
        Arguments:
            box_cls, box_delta: Same as the output of :meth:`RetinaNetHead.forward`
            anchors (list[list[Boxes]]): a list of #images elements. Each is a
                list of #feature level Boxes. The Boxes contain anchors of this
                image on the specific feature level.

        Returns:
            results (List[Instances]): a list of #images elements.
        """
        assert len(anchors[0].get_shape()) == 2, "error anchors dims"
        anchors_size = [tf.shape(x)[0] for x in anchors]
        anchors = tf.concat(anchors, axis=0)

        box_cls = [
            reshape_to_N_HWA_K(x, self.num_classes + 1) for x in box_cls
        ]
        box_delta = [reshape_to_N_HWA_K(x, 4) for x in box_delta]
        box_cls = tf.concat(box_cls, axis=1)
        box_delta = tf.concat(box_delta, axis=1)

        results = wmlt.static_or_dynamic_map_fn(
            lambda x: self.inference_single_image(x[0], x[1], anchors,
                                                  anchors_size),
            elems=[box_cls, box_delta],
            dtype=(tf.float32, tf.int32, tf.float32, tf.int32),
            back_prop=False)
        outdata = {
            RD_BOXES: results[0],
            RD_LABELS: results[1],
            RD_PROBABILITY: results[2],
            RD_LENGTH: results[3]
        }
        if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG:
            wsummary.detection_image_summary(
                images=inputs[IMAGE],
                boxes=outdata[RD_BOXES],
                classes=outdata[RD_LABELS],
                scores=outdata[RD_PROBABILITY],
                lengths=outdata[RD_LENGTH],
                name="SSD_result",
                category_index=DataLoader.category_index)
        return outdata
Beispiel #6
0
 def show_anchors(self, anchors, features, img_size=[512, 512]):
     with tf.device(":/cpu:0"):
         with tf.name_scope("show_anchors"):
             image = tf.ones(img_size)
             image = tf.expand_dims(image, axis=0)
             image = tf.expand_dims(image, axis=-1)
             image = tf.tile(image, [1, 1, 1, 3])
             for i in range(len(anchors)):
                 if not isinstance(self.aspect_ratios[i][0], Iterable):
                     num_cell_anchors = len(self.aspect_ratios[i]) * len(
                         self.sizes[i])
                 else:
                     num_cell_anchors = len(self.aspect_ratios[i][0]) * len(
                         self.sizes[i])
                 shape = wmlt.combined_static_and_dynamic_shape(features[i])
                 offset = ((shape[1] // 2) * shape[2] +
                           shape[2] // 2) * num_cell_anchors
                 boxes = anchors[i][offset:offset + num_cell_anchors]
                 boxes = tf.expand_dims(boxes, axis=0)
                 wsummary.detection_image_summary(images=image,
                                                  boxes=boxes,
                                                  name=f"level_{i}")
Beispiel #7
0
    def inference(self,inputs,head_outputs):
        """
        Arguments:
            inputs: same as CenterNet.forward's batched_inputs
        Returns:
            results:
            RD_BOXES: [B,N,4]
            RD_LABELS: [B,N]
            RD_PROBABILITY:[ B,N]
            RD_LENGTH:[B]
        """
        self.inputs = inputs
        all_bboxes = []
        all_scores = []
        all_clses = []
        all_length = []
        img_size = tf.shape(inputs[IMAGE])[1:3]
        assert len(head_outputs)==1,f"Error head outputs len {len(head_outputs)}"
        nms = partial(odl.boxes_nms,threshold=self.nms_threshold)
        bboxes,clses, scores,length = self.get_box_in_a_single_layer(head_outputs[0],self.cfg.SCORE_THRESH_TEST)
        bboxes, labels, nms_indexs, lens = odl.batch_nms_wrapper(bboxes, clses, length, confidence=None,
                                  nms=nms,
                                  k=self.max_detections_per_image,
                                  sort=True)
        scores = wmlt.batch_gather(scores,nms_indexs)

        outdata = {RD_BOXES:bboxes,RD_LABELS:labels,RD_PROBABILITY:scores,RD_LENGTH:lens}
        if global_cfg.GLOBAL.SUMMARY_LEVEL<=SummaryLevel.DEBUG:
            wsummary.detection_image_summary(images=inputs[IMAGE],
                                             boxes=outdata[RD_BOXES],
                                             classes=outdata[RD_LABELS],
                                             lengths=outdata[RD_LENGTH],
                                             scores=outdata[RD_PROBABILITY],
                                             name="CenterNetOutput",
                                             category_index=DataLoader.category_index)
        return outdata
Beispiel #8
0
 def trans_boxes(self, bboxes, levels, img_size):
     B, box_nr = wmlt.combined_static_and_dynamic_shape(levels)
     anchor_boxes_size = tf.tile(self.rcnn_anchor_boxes, [B, 1])
     boxes_size = wmlt.batch_gather(anchor_boxes_size, levels)
     w = boxes_size / tf.to_float(img_size[1])
     h = boxes_size / tf.to_float(img_size[0])
     ymin, xmin, ymax, xmax = tf.unstack(bboxes, axis=-1)
     cy = (ymin + ymax) / 2
     cx = (xmin + xmax) / 2
     ymin = cy - h / 2
     ymax = cy + h / 2
     xmin = cx - w / 2
     xmax = cx + w / 2
     new_boxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1)
     #####
     log_bboxes = tf.concat([bboxes[:, :3], new_boxes[:, :3]], axis=1)
     log_labels = tf.convert_to_tensor([[1, 2, 3, 11, 12, 13]],
                                       dtype=tf.int32)
     log_labels = tf.tile(log_labels, [B, 1])
     wsummary.detection_image_summary(self.batched_inputs[IMAGE],
                                      boxes=log_bboxes,
                                      classes=log_labels,
                                      name="to_anchor_bboxes")
     return new_boxes
Beispiel #9
0
    def detection_image_summary(inputs,
                           max_boxes_to_draw=20,
                           min_score_thresh=0.2,name="detection_image_summary",max_outputs=3,show_mask=True):
        image = inputs.get('image',None)

        if 'gt_boxes' not in inputs:
            if image is not None:
                wsummary.image_summaries(image,
                                     name=name+"_onlyimg")
            return
        
        boxes = inputs.get('gt_boxes',None)
        classes = inputs.get('gt_labels',None)
        instance_masks = inputs.get('gt_masks',None)
        lengths = inputs.get('gt_length',None)
        if instance_masks is not None and show_mask:
            wsummary.detection_image_summary(image,
                                             boxes,classes,instance_masks=instance_masks,
                                             lengths=lengths,category_index=DataLoader.category_index,
                                             max_boxes_to_draw=max_boxes_to_draw,
                                             min_score_thresh=min_score_thresh,
                                             max_outputs=max_outputs,
                                             name=name)
        else:
            wsummary.detection_image_summary(image,boxes,classes,
                                             lengths=lengths,category_index=DataLoader.category_index,
                                             max_boxes_to_draw=max_boxes_to_draw,
                                             min_score_thresh=min_score_thresh,
                                             max_outputs=max_outputs,
                                             name=name)
        if GT_KEYPOINTS in inputs:
            wsummary.keypoints_image_summary(image,keypoints=inputs[GT_KEYPOINTS],
                                             lengths=lengths,
                                             keypoints_pair=global_cfg.MODEL.KEYPOINTS.POINTS_PAIRS,
                                             name="keypoints")
        '''wsummary.detection_image_summary(tf.ones_like(image)*255,boxes,classes,
Beispiel #10
0
    def inference(self, inputs, head_outputs):
        """
        Arguments:
            inputs: same as CenterNet.forward's batched_inputs
        Returns:
            results:
            RD_BOXES: [B,N,4]
            RD_LABELS: [B,N]
            RD_PROBABILITY:[ B,N]
            RD_LENGTH:[B]
        """
        self.inputs = inputs
        all_bboxes = []
        all_scores = []
        all_clses = []
        all_length = []
        img_size = tf.shape(inputs[IMAGE])[1:3]
        for i, datas in enumerate(head_outputs):
            num_dets = max(self.topk_candidates // (4**i), 4)
            K = max(self.k // (4**i), 4)
            bboxes, scores, clses, length = self.get_box_in_a_single_layer(
                datas, num_dets, img_size, K)
            all_bboxes.append(bboxes)
            all_scores.append(scores)
            all_clses.append(clses)
            all_length.append(length)

        with tf.name_scope(f"merge_all_boxes"):
            bboxes, _ = wmlt.batch_concat_with_length(all_bboxes, all_length)
            scores, _ = wmlt.batch_concat_with_length(all_scores, all_length)
            clses, length = wmlt.batch_concat_with_length(
                all_clses, all_length)

            nms = functools.partial(tfop.boxes_nms,
                                    threshold=self.nms_threshold,
                                    classes_wise=True,
                                    k=self.max_detections_per_image)
            #预测时没有背景, 这里加上1使背景=0
            clses = clses + 1
            #bboxes = tf.Print(bboxes,["shape",tf.shape(bboxes),tf.shape(clses),length],summarize=100)
            bboxes, labels, nms_indexs, lens = odl.batch_nms_wrapper(
                bboxes,
                clses,
                length,
                confidence=None,
                nms=nms,
                k=self.max_detections_per_image,
                sort=True)
            scores = wmlt.batch_gather(scores, nms_indexs)
        #labels = clses+1
        #lens = length

        outdata = {
            RD_BOXES: bboxes,
            RD_LABELS: labels,
            RD_PROBABILITY: scores,
            RD_LENGTH: lens
        }
        if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG:
            wsummary.detection_image_summary(
                images=inputs[IMAGE],
                boxes=outdata[RD_BOXES],
                classes=outdata[RD_LABELS],
                lengths=outdata[RD_LENGTH],
                scores=outdata[RD_PROBABILITY],
                name="CenterNetOutput",
                category_index=DataLoader.category_index)

        return outdata
Beispiel #11
0
    def losses(self):
        """
        Args:
            For `gt_classes` and `gt_anchors_deltas` parameters, see
                :meth:`FCOSGIou.get_ground_truth`.
            Their shapes are (N, R) and (N, R, 4), respectively, where R is
            the total number of anchors across levels, i.e. sum(Hi x Wi x A)
            For `pred_class_logits` and `pred_anchor_deltas`, see
                :meth:`FCOSGIouHead.forward`.

        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a scalar tensor
                storing the loss. Used during training only. The dict keys are:
                "loss_cls" and "loss_box_reg"
        """

        assert len(self.pred_logits[0].get_shape()) == 4, "error logits dim"

        gt_results = self._get_ground_truth()
        loss_cls_list = []
        loss_regression_list = []
        loss_center_ness_list = []
        total_num_foreground = []

        img_size = tf.shape(self.batched_inputs[IMAGE])[1:3]

        for i, gt_results_item in enumerate(gt_results):
            gt_classes = gt_results_item['g_classes']
            gt_boxes = gt_results_item['g_boxes']
            g_center_ness = gt_results_item['g_center_ness']
            pred_class_logits = self.pred_logits[i]
            pred_regression = self.pred_regression[i]
            pred_center_ness = self.pred_center_ness[i]

            foreground_idxs = (gt_classes > 0)
            num_foreground = tf.reduce_sum(tf.cast(foreground_idxs, tf.int32))
            total_num_foreground.append(num_foreground)

            gt_classes_target = tf.one_hot(gt_classes,
                                           depth=self.num_classes + 1)
            gt_classes_target = gt_classes_target[..., 1:]

            #
            pred_center_ness = tf.expand_dims(pred_center_ness, axis=-1)
            wsummary.histogram_or_scalar(pred_center_ness, "center_ness")
            # logits loss
            loss_cls = tf.reduce_sum(
                wnn.sigmoid_cross_entropy_with_logits_FL(
                    labels=gt_classes_target,
                    logits=pred_class_logits,
                    alpha=self.focal_loss_alpha,
                    gamma=self.focal_loss_gamma))

            # regression loss
            pred_boxes = self.box2box_transform.apply_deltas(
                regression=pred_regression, img_size=img_size)
            if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG and gt_classes.get_shape(
            ).as_list()[0] > 1:
                log_boxes = self.box2box_transform.apply_deltas(
                    regression=gt_results_item['g_regression'],
                    img_size=img_size)
                log_boxes = odbox.tfabsolutely_boxes_to_relative_boxes(
                    log_boxes, width=img_size[1], height=img_size[0])
                boxes1 = tf.reshape(log_boxes[1:2], [1, -1, 4])
                wsummary.detection_image_summary(
                    images=self.batched_inputs[IMAGE][1:2],
                    boxes=boxes1,
                    name="FCOSGIou_decode_test")
            pred_center_ness = tf.boolean_mask(pred_center_ness,
                                               foreground_idxs)
            g_center_ness = tf.boolean_mask(g_center_ness, foreground_idxs)
            pred_boxes = tf.boolean_mask(pred_boxes, foreground_idxs)
            gt_boxes = tf.boolean_mask(gt_boxes, foreground_idxs)
            wsummary.histogram_or_scalar(pred_center_ness, "center_ness_pos")
            reg_loss_sum = (1.0 - odl.giou(pred_boxes, gt_boxes))
            wmlt.variable_summaries_v2(reg_loss_sum, f"giou_loss{i}")
            pred_center_ness = tf.squeeze(pred_center_ness, axis=-1)
            reg_norm = tf.reduce_sum(g_center_ness) + 1e-5
            reg_loss_sum = reg_loss_sum * g_center_ness
            wmlt.variable_summaries_v2(reg_loss_sum, f"loss_sum{i}")
            loss_box_reg = tf.reduce_sum(reg_loss_sum) * 300 / reg_norm
            wmlt.variable_summaries_v2(loss_box_reg, f"box_reg_loss_{i}")

            loss_center_ness = 0.5 * tf.nn.sigmoid_cross_entropy_with_logits(
                labels=g_center_ness, logits=pred_center_ness)
            loss_center_ness = tf.reduce_sum(loss_center_ness) * 0.1
            wmlt.variable_summaries_v2(loss_center_ness,
                                       f"center_ness_loss{i}")

            loss_cls_list.append(loss_cls)
            loss_regression_list.append(loss_box_reg)
            loss_center_ness_list.append(loss_center_ness)

        total_num_foreground = tf.to_float(
            tf.maximum(tf.add_n(total_num_foreground), 1))
        return {
            "fcos_loss_cls":
            tf.add_n(loss_cls_list) / total_num_foreground,
            "fcos_loss_center_ness":
            tf.add_n(loss_center_ness_list) / total_num_foreground,
            "fcos_loss_box_reg":
            tf.add_n(loss_regression_list) / total_num_foreground
        }
Beispiel #12
0
def mask_rcnn_loss(inputs,
                   pred_mask_logits,
                   proposals: EncodedData,
                   fg_selection_mask,
                   log=True):
    '''

    :param inputs:inputs[GT_MASKS] [batch_size,N,H,W]
    :param pred_mask_logits: [Y,H,W,C] C==1 if cls_anostic_mask else num_classes, H,W is the size of mask
       not the position in org image
    :param proposals:proposals.indices:[batch_size,M], proposals.boxes [batch_size,M],proposals.gt_object_logits:[batch_size,M]
    :param fg_selection_mask: [X]
    X = batch_size*M
    Y = tf.reduce_sum(fg_selection_mask)
    :return:
    '''
    cls_agnostic_mask = pred_mask_logits.get_shape().as_list()[-1] == 1
    total_num_masks, mask_H, mask_W, C = wmlt.combined_static_and_dynamic_shape(
        pred_mask_logits)
    assert mask_H == mask_W, "Mask prediction must be square!"

    gt_masks = inputs[GT_MASKS]  #[batch_size,N,H,W]

    with tf.device("/cpu:0"):
        #当输入图像分辨率很高时这里可能会消耗过多的GPU资源,因此改在CPU上执行
        batch_size, X, H, W = wmlt.combined_static_and_dynamic_shape(gt_masks)
        #background include in proposals, which's indices is -1
        gt_masks = tf.reshape(gt_masks, [batch_size * X, H, W])
        indices = btf.twod_indexs_to_oned_indexs(tf.nn.relu(proposals.indices),
                                                 depth=X)
        indices = tf.boolean_mask(indices, fg_selection_mask)
        gt_masks = tf.gather(gt_masks, indices)

    boxes = proposals.boxes
    batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape(boxes)
    boxes = tf.reshape(boxes, [batch_size * box_nr, box_dim])
    boxes = tf.boolean_mask(boxes, fg_selection_mask)

    with tf.device("/cpu:0"):
        #当输入图像分辨率很高时这里可能会消耗过多的GPU资源,因此改在CPU上执行
        gt_masks = tf.expand_dims(gt_masks, axis=-1)
        croped_masks_gt_masks = wmlt.tf_crop_and_resize(
            gt_masks, boxes, [mask_H, mask_W])

    if not cls_agnostic_mask:
        gt_classes = proposals.gt_object_logits
        gt_classes = tf.reshape(gt_classes, [-1])
        gt_classes = tf.boolean_mask(gt_classes, fg_selection_mask)
        pred_mask_logits = tf.transpose(pred_mask_logits, [0, 3, 1, 2])
        pred_mask_logits = wmlt.batch_gather(pred_mask_logits,
                                             gt_classes - 1)  #预测中不包含背景
        pred_mask_logits = tf.expand_dims(pred_mask_logits, axis=-1)

    if log and config.global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG:
        with tf.device(":/cpu:0"):
            with tf.name_scope("mask_loss_summary"):
                pmasks_2d = tf.reshape(fg_selection_mask, [batch_size, box_nr])
                boxes_3d = tf.expand_dims(boxes, axis=1)
                wsummary.positive_box_on_images_summary(inputs[IMAGE],
                                                        proposals.boxes,
                                                        pmasks=pmasks_2d)
                image = wmlt.select_image_by_mask(inputs[IMAGE], pmasks_2d)
                t_gt_masks = tf.expand_dims(tf.squeeze(gt_masks, axis=-1),
                                            axis=1)
                wsummary.detection_image_summary(
                    images=image,
                    boxes=boxes_3d,
                    instance_masks=t_gt_masks,
                    name="mask_and_boxes_in_mask_loss")
                log_mask = gt_masks
                log_mask = ivis.draw_detection_image_summary(
                    log_mask, boxes=tf.expand_dims(boxes, axis=1))
                log_mask = wmli.concat_images(
                    [log_mask, croped_masks_gt_masks])
                wmlt.image_summaries(log_mask, "mask", max_outputs=3)

                log_mask = wmli.concat_images(
                    [gt_masks,
                     tf.cast(pred_mask_logits > 0.5, tf.float32)])
                wmlt.image_summaries(log_mask, "gt_vs_pred", max_outputs=3)
    mask_loss = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=croped_masks_gt_masks, logits=pred_mask_logits)
    mask_loss = btf.safe_reduce_mean(mask_loss)

    return mask_loss
    pass
Beispiel #13
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (B,H, W,C) format.
                * instances (optional): groundtruth :class:`Instances`
                * proposals (optional): :class:`Instances`, precomputed proposals.

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

        Returns:
            list[dict]:
                Each dict is the output for one input image.
                The dict contains one key "instances" whose value is a :class:`Instances`.
                The :class:`Instances` object has the following keys:
                "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
        """
        if not self.is_training:
            return self.inference(batched_inputs)

        batched_inputs = self.preprocess_image(batched_inputs)

        '''
        使用主干网络生成一个FeatureMap, 如ResNet的Res4(stride=16)
        '''
        features = self.backbone(batched_inputs)
        if self.roi_heads_backbone is not None:
            roi_features = self.roi_heads_backbone(batched_inputs)
            pg_features = features
        else:
            if isinstance(features,(list,tuple)):
                pg_features = features[0]
                roi_features = features[1]
            else:
                pg_features = features
                roi_features = features

        if self.proposal_generator:
            proposals, proposal_losses = self.proposal_generator(batched_inputs, pg_features)
        else:
            assert "proposals" in batched_inputs[0]
            proposals = {"proposal_boxes":batched_inputs["proposals"]}
            proposal_losses = {}

        results, detector_losses = self.roi_heads(batched_inputs, roi_features, proposals)

        if len(results)>0:
            wsummary.detection_image_summary(images=batched_inputs[IMAGE],
                                         boxes=results[RD_BOXES], classes=results[RD_LABELS],
                                         lengths=results[RD_LENGTH],
                                         scores=results[RD_PROBABILITY],
                                         name="RCNN_result",
                                         category_index=DataLoader.category_index)

        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)
        return results,losses
Beispiel #14
0
    def inference(self, batched_inputs, detected_instances=None, do_postprocess=True):
        """
        Run inference on the given inputs.

        Args:
            batched_inputs (list[dict]): same as in :meth:`forward`
            detected_instances (None or list[Instances]): if not None, it
                contains an `Instances` object per image. The `Instances`
                object contains "pred_boxes" and "pred_classes" which are
                known boxes in the image.
                The inference will then skip the detection of bounding boxes,
                and only predict other per-ROI outputs.
            do_postprocess (bool): whether to apply post-processing on the outputs.

        Returns:
            same as in :meth:`forward`.
        """
        assert not self.is_training

        batched_inputs = self.preprocess_image(batched_inputs)
        features = self.backbone(batched_inputs)
        if self.roi_heads_backbone is not None:
            roi_features = self.roi_heads_backbone(batched_inputs)
            pg_features = features
        else:
            if isinstance(features,(list,tuple)):
                pg_features = features[0]
                roi_features = features[1]
            else:
                pg_features = features
                roi_features = features

        if detected_instances is None:
            if self.proposal_generator:
                proposals, _ = self.proposal_generator(batched_inputs, pg_features)
            else:
                assert "proposals" in batched_inputs[0]
                proposals = [x["proposals"].to(self.device) for x in batched_inputs]

            results, _ = self.roi_heads(batched_inputs, roi_features, proposals)
        else:
            detected_instances = [x.to(self.device) for x in detected_instances]
            results = self.roi_heads.forward_with_given_boxes(roi_features, detected_instances)
        instance_masks = None if not self.cfg.MODEL.MASK_ON else results.get(RD_MASKS,None)
        if instance_masks is not None:
            shape = btf.combined_static_and_dynamic_shape(batched_inputs[IMAGE])
            instance_masks = tf.cast(instance_masks>0.5,tf.float32)
            instance_masks = ivs.batch_tf_get_fullsize_mask(boxes=results[RD_BOXES],
                                                   masks=instance_masks,
                                                   size=shape[1:3]
                                                   )
        wsummary.detection_image_summary(images=batched_inputs[IMAGE],
                                         boxes=results[RD_BOXES],classes=results[RD_LABELS],
                                         lengths=results[RD_LENGTH],
                                         scores=results[RD_PROBABILITY],
                                         instance_masks=instance_masks,name="RCNN_result",
                                         category_index=DataLoader.category_index)

        if instance_masks is not None:
            wsummary.detection_image_summary(images=tf.zeros_like(batched_inputs[IMAGE]),
                                             boxes=results[RD_BOXES],classes=results[RD_LABELS],
                                             lengths=results[RD_LENGTH],
                                             instance_masks=instance_masks,
                                             name="RCNN_Mask_result",
                                             category_index=DataLoader.category_index)

        if do_postprocess:
            return self._postprocess(results, batched_inputs),None
        else:
            return results,None
Beispiel #15
0
    def forward(self, inputs, features):

        features = [features[f] for f in self.in_features]

        gt_boxes = inputs.get(GT_BOXES, None)
        #gt_labels = inputs.gt_labels
        gt_length = inputs.get(GT_LENGTH, None)

        pred_objectness_logits, pred_anchor_deltas = self.rpn_head(
            inputs, features)
        anchors = self.rpn_head.anchor_generator(inputs, features)
        self.anchors_num_per_level = [
            wmlt.combined_static_and_dynamic_shape(x)[0] for x in anchors
        ]
        outputs = build_outputs(self.cfg.MODEL.RPN.OUTPUTS,
                                self.box2box_transform,
                                self.anchor_matcher,
                                self.batch_size_per_image,
                                self.positive_fraction,
                                pred_objectness_logits,
                                pred_anchor_deltas,
                                anchors,
                                gt_boxes,
                                gt_length=gt_length)
        if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG:
            outputs.inputs = inputs

        if self.is_training:
            losses = {
                k: v * self.loss_weight
                for k, v in outputs.losses().items()
            }
            rpn_threshold = 0.0
        else:
            rpn_threshold = self.cfg.MODEL.PROPOSAL_GENERATOR.SCORE_THRESH_TEST
            losses = {}

        # Find the top proposals by applying NMS and removing boxes that
        # are too small. The proposals are treated as fixed for approximate
        # joint training with roi heads. This approach ignores the derivative
        # w.r.t. the proposal boxes’ coordinates that are also network
        # responses, so is approximate.
        pre_nms_topk_max_per_layer = self.cfg.MODEL.RPN.PRE_NMS_TOPK_MAX_PER_LAYER
        proposals, logits = find_top_rpn_proposals(
            outputs.predict_proposals(),
            outputs.predict_objectness_logits(),
            self.nms_thresh,
            self.pre_nms_topk[self.is_training],
            self.post_nms_topk[self.is_training],
            self.anchors_num_per_level,
            score_threshold=rpn_threshold,
            is_training=self.is_training,
            pre_nms_topk_max_per_layer=pre_nms_topk_max_per_layer)
        if self.cfg.MODEL.RPN.SORT_RESULTS:
            with tf.name_scope("sort_rpn_results"):

                def fn(bboxes, keys):
                    N = wmlt.combined_static_and_dynamic_shape(keys)
                    new_keys, indices = tf.nn.top_k(keys, k=N[0])
                    bboxes = tf.gather(bboxes, indices)
                    return [bboxes, keys]

                proposals, logits = tf.map_fn(lambda x: fn(x[0], x[1]),
                                              elems=[proposals, logits],
                                              back_prop=False)

        outdata = {PD_BOXES: proposals, PD_PROBABILITY: tf.nn.sigmoid(logits)}
        wsummary.detection_image_summary(images=inputs[IMAGE],
                                         boxes=outdata[PD_BOXES],
                                         name="rpn/proposals")

        return outdata, losses
Beispiel #16
0
    def forward(self, batched_inputs, features):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (H, W, C) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        if len(self.in_features) == 0:
            print(
                f"Error no input features for retinanet, use all features {features.keys()}"
            )
            features = list(features.values())
        else:
            features = [features[f] for f in self.in_features]
        pred_logits, pred_regression, pred_center_ness = self.head(features)
        gt_boxes = batched_inputs[GT_BOXES]
        gt_length = batched_inputs[GT_LENGTH]
        gt_labels = batched_inputs[GT_LABELS]

        outputs = build_outputs(
            name=self.cfg.MODEL.FCOSPG.OUTPUTS,
            cfg=self.cfg.MODEL.FCOSPG,
            parent=self,
            box2box_transform=self.box2box_transform,
            pred_logits=pred_logits,
            pred_regression=pred_regression,
            pred_center_ness=pred_center_ness,
            gt_boxes=gt_boxes,
            gt_labels=gt_labels,
            gt_length=gt_length,
            batched_inputs=batched_inputs,
            max_detections_per_image=self.cfg.TEST.DETECTIONS_PER_IMAGE,
        )

        results = outputs.inference(inputs=batched_inputs,
                                    box_cls=pred_logits,
                                    box_regression=pred_regression,
                                    center_ness=pred_center_ness)
        losses = {}
        if self.is_training:
            _losses = outputs.losses()
            for k, v in _losses.items():
                losses["pg_" + k] = v
        outdata = {
            PD_BOXES: results[RD_BOXES],
            PD_PROBABILITY: results[RD_PROBABILITY]
        }
        wsummary.detection_image_summary(images=batched_inputs[IMAGE],
                                         boxes=outdata[PD_BOXES],
                                         name="fcospg/proposals")

        return outdata, losses