예제 #1
0
    def visualize_training(self, batched_inputs, results):
        """
        A function used to visualize ground truth images and final network predictions.
        It shows ground truth bounding boxes on the original image and up to 20
        predicted object bounding boxes on the original image.

        Args:
            batched_inputs (list): a list that contains input to the model.
            results (List[Instances]): a list of #images elements.
        """
        from detectron2.utils.visualizer import Visualizer

        assert len(batched_inputs) == len(
            results), "Cannot visualize inputs and results of different sizes"
        storage = get_event_storage()
        max_boxes = 20

        image_index = 0  # only visualize a single image
        img = batched_inputs[image_index]["image"].cpu().numpy()
        assert img.shape[0] == 3, "Images should have 3 channels."
        if self.input_format == "BGR":
            img = img[::-1, :, :]
        img = img.transpose(1, 2, 0)
        v_gt = Visualizer(img, None)
        v_gt = v_gt.overlay_instances(
            boxes=batched_inputs[image_index]["instances"].gt_boxes)
        anno_img = v_gt.get_image()
        processed_results = detector_postprocess(results[image_index],
                                                 img.shape[0], img.shape[1])
        predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu(
        ).numpy()

        v_pred = Visualizer(img, None)
        v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes])
        prop_img = v_pred.get_image()
        vis_img = np.vstack((anno_img, prop_img))
        vis_img = vis_img.transpose(2, 0, 1)
        vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results"
        storage.put_image(vis_name, vis_img)
예제 #2
0
    def _log_accuracy(self):
        """
        Log the accuracy metrics to EventStorage.
        """
        num_instances = self.gt_classes.numel()
        pred_classes = self.pred_class_logits.argmax(dim=1)
        bg_class_ind = self.pred_class_logits.shape[1] - 1

        fg_inds = (self.gt_classes >= 0) & (self.gt_classes < bg_class_ind)
        num_fg = fg_inds.nonzero().numel()
        fg_gt_classes = self.gt_classes[fg_inds]
        fg_pred_classes = pred_classes[fg_inds]

        num_false_negative = (fg_pred_classes == bg_class_ind).nonzero().numel()
        num_accurate = (pred_classes == self.gt_classes).nonzero().numel()
        fg_num_accurate = (fg_pred_classes == fg_gt_classes).nonzero().numel()

        storage = get_event_storage()
        storage.put_scalar("fast_rcnn/cls_accuracy", num_accurate / num_instances)
        if num_fg > 0:
            storage.put_scalar("fast_rcnn/fg_cls_accuracy", fg_num_accurate / num_fg)
            storage.put_scalar("fast_rcnn/false_negative", num_false_negative / num_fg)
예제 #3
0
    def losses(self):
        """
        Return the losses from a set of RPN predictions and their associated ground-truth.

        Returns:
            dict[loss name -> loss value]: A dict mapping from loss name to loss value.
                Loss names are: `loss_rpn_cls` for objectness classification and
                `loss_rpn_loc` for proposal localization.
        """
        gt_labels = torch.stack(self.gt_labels)
        anchors = self.anchors[0].cat(self.anchors).tensor  # Ax(4 or 5)
        gt_anchor_deltas = [
            self.box2box_transform.get_deltas(anchors, k)
            for k in self.gt_boxes
        ]
        gt_anchor_deltas = torch.stack(gt_anchor_deltas)

        # Log the number of positive/negative anchors per-image that's used in training
        num_pos_anchors = (gt_labels == 1).sum().item()
        num_neg_anchors = (gt_labels == 0).sum().item()
        storage = get_event_storage()
        storage.put_scalar("rpn/num_pos_anchors",
                           num_pos_anchors / self.num_images)
        storage.put_scalar("rpn/num_neg_anchors",
                           num_neg_anchors / self.num_images)

        objectness_loss, localization_loss = rpn_losses(
            gt_labels,
            gt_anchor_deltas,
            # concat on the Hi*Wi*A dimension
            cat(self.pred_objectness_logits, dim=1),
            cat(self.pred_anchor_deltas, dim=1),
            self.smooth_l1_beta,
        )
        normalizer = self.batch_size_per_image * self.num_images
        return {
            "loss_rpn_cls": objectness_loss / normalizer,
            "loss_rpn_loc": localization_loss / normalizer,
        }
예제 #4
0
    def __init__(self,
                 images,
                 pred_logits,
                 pred_offsets,
                 gt_sizes=None,
                 strides=None):
        self.image_sizes = images.image_sizes
        self.pred_logits = pred_logits
        self.pred_offsets = pred_offsets
        self.pred_coordinates = [
            offsets2coordinates(offset, images.tensor.shape[-2:])
            for offset in pred_offsets
        ]

        device = self.pred_logits[0].device
        self.gt_sizes = torch.sqrt(
            torch.pow(gt_sizes.tensor.to(device), 2).sum(1))
        self.strides = strides

        self.num_feature_maps = len(pred_logits)

        storage = get_event_storage()
        storage.put_image("sizes", self.gt_sizes[0:1] / 512)
예제 #5
0
    def visualize_training(self, batched_inputs, proposals):
        """
        A function used to visualize images and proposals. It shows ground truth
        bounding boxes on the original image and up to 20 predicted object
        proposals on the original image. Users can implement different
        visualization functions for different models.

        Args:
            batched_inputs (list): a list that contains input to the model.
            proposals (list): a list that contains predicted proposals. Both
                batched_inputs and proposals should have the same length.
        """
        from detectron2.utils.visualizer import Visualizer

        storage = get_event_storage()
        max_vis_prop = 20

        for input, prop in zip(batched_inputs, proposals):
            img = input["image"].cpu().numpy()
            assert img.shape[0] == 3, "Images should have 3 channels."
            if self.input_format == "BGR":
                img = img[::-1, :, :]
            img = img.transpose(1, 2, 0)
            v_gt = Visualizer(img, None)
            v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes)
            anno_img = v_gt.get_image()
            box_size = min(len(prop.proposal_boxes), max_vis_prop)
            v_pred = Visualizer(img, None)
            v_pred = v_pred.overlay_instances(
                boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy()
            )
            prop_img = v_pred.get_image()
            vis_img = np.concatenate((anno_img, prop_img), axis=1)
            vis_img = vis_img.transpose(2, 0, 1)
            vis_name = "Left: GT bounding boxes;  Right: Predicted proposals"
            storage.put_image(vis_name, vis_img)
            break  # only visualize one image in a batch
예제 #6
0
    def forward(self, batched_inputs):
        if not self.training:
            return self.inference(batched_inputs)
        images = self.preprocess_image(batched_inputs)
        gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
        for i in range(len(gt_instances)):
            dataset_source = batched_inputs[i]['dataset_source']
            gt_instances[i]._dataset_source = dataset_source

        features = self.backbone(images.tensor)  # #lvl
        proposals, proposal_losses = self.proposal_generator(
            images, features, gt_instances)

        _, detector_losses = self.roi_heads(images, features, proposals,
                                            gt_instances)
        if self.vis_period > 0:
            storage = get_event_storage()
            if storage.iter % self.vis_period == 0:
                self.visualize_training(batched_inputs, proposals)

        losses = {}
        losses.update(proposal_losses)
        losses.update(detector_losses)
        return losses
예제 #7
0
    def get_clustering_loss(self, input_features, proposals):
        if not self.enable_clustering:
            return 0

        storage = get_event_storage()
        c_loss = 0
        if storage.iter == self.clustering_start_iter:
            items = self.feature_store.retrieve(-1)
            for index, item in enumerate(items):
                if len(item) == 0:
                    self.means[index] = None
                else:
                    mu = torch.tensor(item).mean(dim=0)
                    self.means[index] = mu
            c_loss = self.clstr_loss_l2_cdist(input_features, proposals)
            # Freeze the parameters when clustering starts
            # for param in self.ae_model.parameters():
            #     param.requires_grad = False
        elif storage.iter > self.clustering_start_iter:
            if storage.iter % self.clustering_update_mu_iter == 0:
                # Compute new MUs
                items = self.feature_store.retrieve(-1)
                new_means = [None for _ in range(self.num_classes + 1)]
                for index, item in enumerate(items):
                    if len(item) == 0:
                        new_means[index] = None
                    else:
                        new_means[index] = torch.tensor(item).mean(dim=0)
                # Update the MUs
                for i, mean in enumerate(self.means):
                    if(mean) is not None and new_means[i] is not None:
                        self.means[i] = self.clustering_momentum * mean + \
                                        (1 - self.clustering_momentum) * new_means[i]

            c_loss = self.clstr_loss_l2_cdist(input_features, proposals)
        return c_loss
예제 #8
0
    def forward(self, batched_inputs: Tuple[Dict[str, Tensor]]):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances: Instances

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.
        Returns:
            in training, dict[str: Tensor]:
                mapping from a named loss to a tensor storing the loss. Used during training only.
            in inference, the standard output format, described in :doc:`/tutorials/models`.
        """
        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)
        features = [features[f] for f in self.head_in_features]

        anchors = self.anchor_generator(features)
        pred_logits, pred_anchor_deltas = self.head(features)
        # Transpose the Hi*Wi*A dimension to the middle:
        pred_logits = [
            permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits
        ]
        pred_anchor_deltas = [
            permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas
        ]

        if self.training:
            assert not torch.jit.is_scripting(), "Not supported"
            assert "instances" in batched_inputs[
                0], "Instance annotations are missing in training!"
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]

            gt_labels, gt_boxes = self.label_anchors(anchors, gt_instances)
            losses = self.losses(anchors, pred_logits, gt_labels,
                                 pred_anchor_deltas, gt_boxes)

            if self.vis_period > 0:
                storage = get_event_storage()
                if storage.iter % self.vis_period == 0:
                    results = self.inference(anchors, pred_logits,
                                             pred_anchor_deltas,
                                             images.image_sizes)
                    self.visualize_training(batched_inputs, results)

            return losses
        else:
            results = self.inference(anchors, pred_logits, pred_anchor_deltas,
                                     images.image_sizes)
            if torch.jit.is_scripting():
                return results
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results
예제 #9
0
    def forward(self, images, features, gt_instances=None):
        """
        Args:
            images (ImageList): input images of length `N`
            features (dict[str: Tensor]): input data as a mapping from feature
                map name to tensor. Axis 0 represents the number of images `N` in
                the input data; axes 1-3 are channels, height, and width, which may
                vary between feature maps (e.g., if a feature pyramid is used).
            gt_instances (list[Instances], optional): a length `N` list of `Instances`s.
                Each `Instances` stores ground-truth instances for the corresponding image.

        Returns:
            proposals: list[Instances]: contains fields "proposal_boxes", "objectness_logits"
            loss: dict[Tensor] or None
        """
        features = [features[f] for f in self.in_features]

        pred_objectness_logits, pred_deltas = self.init_head(features)
        torch.cuda.synchronize()

        point_centers, strides = self.get_center_grid(features)
        point_centers = point_centers.to(pred_deltas[0].device)
        strides = strides.to(pred_deltas[0].device)
        pred_boxes = self.init_head.points2bbox(point_centers, pred_deltas)
        if self.training:
            # (N, H*W*L), (N, H*W*L, 4)
            gt_labels, gt_boxes = self.label_and_sample_points(
                point_centers, gt_instances, strides)
        else:
            gt_labels, gt_boxes = None, None

        outputs = RepPointsGeneratorResult(
            pred_objectness_logits,
            pred_boxes,
            gt_labels,
            gt_boxes
        )

        if self.training:
            losses = {k: v * self.loss_weight for k, v in outputs.losses(strides).items()}
        else:
            losses = {}

        proposals = outputs.predict_proposals()
        logits = outputs.predict_objectness_logits()
        if self.debug:
            storage = get_event_storage()
            start = 0
            for i, f in enumerate(features):
                h, w = f.shape[-2:]
                centers = point_centers[start:start + h * w].view(h, w, 2)
                stride = strides[start:start + h * w].view(h, w)
                storage.put_image("centers_x-%d" % i, (centers[..., 0:1] / centers[..., 0:1].max()).permute(2, 0, 1))
                storage.put_image("centers_y-%d" % i, (centers[..., 1:] / centers[..., 1:].max()).permute(2, 0, 1))
                storage.put_image("strides-%d" % i, (stride[None] / 64).float())

                gt_label = gt_labels[0, start:start + h * w].view(1, h, w)
                storage.put_image("gt-labels-%d" % i, gt_label.float())

                storage.put_image("pred-logits-%d" % i, torch.sigmoid(logits[i][0].view(1, h, w)))

                start += h * w
            # storage.clear_images()

        with torch.no_grad():
            # Find the top proposals by applying NMS and removing boxes that
            # are too small. The proposals are treated as fixed for approximate
            # joint training with roi heads. This approach ignores the derivative
            # w.r.t. the proposal boxes’ coordinates that are also network
            # responses, so is approximate.
            proposals = find_top_rpn_proposals(
                proposals,
                logits,
                images,
                self.nms_thresh,
                self.pre_nms_topk[self.training],
                self.post_nms_topk[self.training],
                self.min_box_side_len,
                self.training,
            )

        return proposals, losses
예제 #10
0
    def losses(self,
               predictions,
               proposals,
               void_predictions,
               void_proposals,
               image_path=None,
               flips=None,
               use_exemplar=False):
        """
        Args:
            predictions: return values of :meth:`forward()`.
            proposals (list[Instances]): proposals that match the features
                that were used to compute predictions.
        """
        if utils.get_rank() == 0:
            storage = get_event_storage()
            storage.put_scalar(
                "exemplar/num_pseudo_gt",
                len(self.pseudo_gt) if self.pseudo_gt is not None else 0)
        scores, proposal_deltas, feature = predictions
        void_scores, _, void_feature = void_predictions

        if len(void_scores) > 0:
            neg_sample = void_scores
            storage = get_event_storage()
            storage.put_scalar("exemplar/num_neg_sample", len(neg_sample))

            void_neg_loss = -torch.log(
                1 - neg_sample.softmax(-1)[:, :self.num_classes - 1] + 1e-8)
            if len(void_neg_loss) > 0:
                void_neg_loss = void_neg_loss.sum() / len(void_neg_loss)
            else:
                void_neg_loss = void_neg_loss.sum()
        else:
            void_neg_loss = scores.sum() * 0

        void_loss = {'loss_void_neg': void_neg_loss}
        if use_exemplar:
            a, b, c = void_predictions
            l = sum([len(x) for x in void_proposals[:-1]])
            self.add_feature(predictions, proposals, (a[:l], b[:l], c[:l]),
                             void_proposals[:-1], image_path[:-1], flips[:-1])
        else:
            self.add_feature(predictions, proposals, void_predictions,
                             void_proposals, image_path, flips)

        frcnn_outputs = FastRCNNOutputs(
            self.box2box_transform,
            scores,
            proposal_deltas,
            proposals,
            self.smooth_l1_beta,
            self.box_reg_loss_type,
            self.box_reg_loss_weight,
            self.label_converter,
            add_unlabeled_class=self.add_unlabeled_class,
            cls_weight=self.cls_weight.weight.view(-1),
            bg_class_ind=self.num_classes - 1)
        losses = frcnn_outputs.losses()
        self.step += 1
        losses.update(void_loss)
        return losses
예제 #11
0
    def add_exemplar(self,
                     exemplar_info,
                     void_features,
                     void_proposals,
                     image_path,
                     flips,
                     dir_name='pseudo_gts'):
        exemplar_features, exemplar_labels, exemplar_length = exemplar_info

        p = image_path[0].split('/')[-1].split('.')[0]
        templete = image_path[0].replace(p, '{:012}')
        if self.step % 100 == 0:  # sync multi-gpus
            self.sync_pseudo_gt(templete, dir_name)
        if len(exemplar_features) == 0 or len(void_features) == 0:
            if utils.get_rank() == 0:
                storage = get_event_storage()
                storage.put_scalar("exemplar/add_exemplar", 0)
            return None

        boxes = [x.proposal_boxes.tensor for x in void_proposals]
        l = [len(b) for b in boxes]
        sizes = [x._image_size for x in void_proposals]
        cos = get_cos_sim(void_features, exemplar_features)
        th = max(0.01, self.cos_thresh - (0.01 * self.n_pseudo_gt / 200))
        if float(cos.max()) < 1 - th:
            if utils.get_rank() == 0:
                storage = get_event_storage()
                storage.put_scalar("exemplar/add_exemplar", 0)
            return None
        cos = cos.split(l)
        data = []
        cos_log = []
        label_log = []
        new_label = [
            -torch.ones((len(x), ), device=cos[0].device)
            for x in void_proposals
        ]

        for i, (c, bbox, p, s) in enumerate(zip(cos, boxes, image_path,
                                                sizes)):
            H, W = s
            area = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1])
            ind = size_condition(area, self.size_opt)
            bbox = bbox[ind]
            nonzero_ind = ind.nonzero()
            if len(bbox) == 0:
                continue
            c = c[ind]

            score, ind = c.view(len(bbox), -1).max(dim=0)
            bbox = bbox[ind]
            cc = score
            labels = exemplar_labels
            nonzero_ind = nonzero_ind[ind]
            ind = cc > 1 - th

            cc = cc[ind]
            bbox = bbox[ind]
            nonzero_ind = nonzero_ind[ind]

            keep = nms(bbox, cc, self.nms_thresh)
            bbox = bbox[keep]
            cc = cc[keep]
            l = labels[keep]
            nonzero_ind = nonzero_ind[keep]
            bbox = bbox.div(torch.as_tensor([[W, H, W, H]],
                                            device=bbox.device))
            if flips[i] == 1:
                bbox[:, 0] = 1 - bbox[:, 0]
                bbox[:, 2] = 1 - bbox[:, 2]
                bbox = torch.index_select(
                    bbox, -1, torch.as_tensor([2, 1, 0, 3],
                                              device=bbox.device))
            labels = l.view(-1, 1).float()
            new_label[i][nonzero_ind] = labels
            path = int(p.split('/')[-1].split('.')[0])
            pa = torch.ones((len(bbox), 1), device=bbox.device) * path
            datum = torch.cat((pa, labels, bbox), dim=-1)
            data.append(datum)
            cos_log.append(cc)
            label_log.append(l)

        if len(data) > 0:
            dir_name = os.path.join(self.output_dir, dir_name)
            data = torch.cat(data)
            self.pseudo_gt = torch.cat((self.pseudo_gt, data))
        if utils.get_rank() == 0:
            storage = get_event_storage()
            storage.put_scalar("exemplar/add_exemplar", len(data))

        return new_label
예제 #12
0
    def clustering(self, image_path=None):
        # sync data
        self.sync_pseudo_gt()
        feature = self.gather(self.feature_memory)
        obj_score = self.gather(self.obj_score_memory)
        paths = self.gather(self.path_memory)
        bbox = self.gather(self.bbox_memory)
        self.feature_memory = []
        self.obj_score_memory = []
        self.path_memory = []
        self.bbox_memory = []

        if utils.get_rank() == 0 and self.cls_weight.weight.sum() < len(
                self.cls_weight.weight):
            ids, centroid, var = clustering(feature,
                                            K=self.num_centroid,
                                            step=self.step,
                                            device=feature.device,
                                            tol=1e-3,
                                            Niter=150)
            count = torch.bincount(ids)
            mean_obj_score = torch.bincount(
                ids, weights=obj_score.to(ids.device)) / (count + 1e-6)

            # top 10 % dense clusters.
            dist_topk_bound = -torch.topk(
                -var.view(-1), k=min(len(mean_obj_score), 13)).values[-1]
            mask = var < dist_topk_bound

            # number of found unknown classes
            cls_weight = sum(self.cls_weight.weight) - self.num_classes

            # high objectness clusters.
            cluster_obj_thresh = min(
                self.cluster_obj_thresh *
                (1 + cls_weight / len(self.cls_weight.weight)), 0.99)
            obj_mask = mean_obj_score.to(mask.device) > cluster_obj_thresh

            mask = torch.logical_and(mask, obj_mask.to(mask.device))
            mask = mask.bool().view(-1)
            ids = ids.long().view(-1)

            paths = paths[mask[ids]]
            bbox = bbox[mask[ids]]
            feature = feature[mask[ids]]
            obj_score = obj_score[mask[ids]]
            ids = ids[mask[ids]]

            centroid = centroid[mask]

            if len(obj_score) > 0:
                obj_thresh = min(self.coupled_obj_thresh, max(obj_score))
            else:
                obj_thresh = self.coupled_obj_thresh

            obj_thresh = obj_thresh + (self.n_pseudo_gt * 0.01 / 100)
            obj_thresh = min(obj_thresh, 0.99)
            idx = obj_score >= obj_thresh
            bbox = bbox[idx]
            feature = feature[idx]
            paths = paths[idx]
            obj_score = obj_score[idx]
            ids = ids[idx]

            feats = []
            boxes = []
            ps = []
            obj_scores = []
            new_ids = []
            cls_weight = sum(self.cls_weight.weight) - self.num_classes

            coupled_cos_thresh = self.coupled_cos_thresh * (
                1 - cls_weight / len(self.cls_weight.weight))
            coupled_cos_thresh = max(coupled_cos_thresh, 0.01)
            for i, l in enumerate(sorted(ids.unique())):
                idx = ids == l
                feat = feature[idx]
                bb = bbox[idx]
                path = paths[idx]
                obj = obj_score[idx]

                cos_sim = get_cos_sim(feat, feat).view(-1)
                cos_dist = 1 - cos_sim
                idx = cos_dist.argsort()

                used = []
                used_path = []
                printer = cos_sim[idx]
                printer = printer[printer <
                                  0.99999]  # eliminate same element pairs
                for v in idx:
                    x, y = v // len(feat), v % len(feat)
                    if cos_dist[v] > coupled_cos_thresh:
                        break

                    if path[x] != path[y] and path[
                            x] not in used_path and path[y] not in used_path:
                        used.append(x)
                        used.append(y)
                        used_path.append(path[x])
                        used_path.append(path[y])

                if len(used) > 0:
                    idx = torch.as_tensor(used, device=feat.device)
                    temp_ids = torch.ones(
                        (len(used), ), device=feat.device) * l
                    feats.append(feat[idx])
                    boxes.append(bb[idx])
                    ps.append(path[idx])
                    obj_scores.append(obj[idx])
                    new_ids.append(temp_ids)
            if len(feats) > 0:
                feature = torch.cat(feats)
                bbox = torch.cat(boxes)
                paths = torch.cat(ps)
                obj_score = torch.cat(obj_scores)
                ids = torch.cat(new_ids)
                cls_weight = self.cls_weight.weight
                start_l = int(cls_weight.sum()
                              ) + self.original_num_classes - self.num_classes
                labels = -ids - 1
                unique_label = labels.unique()
                unique_label = unique_label[:cls_weight.shape[0] -
                                            int(cls_weight.sum())]
                for i, p in enumerate(unique_label):
                    if i + start_l - self.original_num_classes == self.num_centroid:
                        break
                    labels[labels == p] = i + start_l

                idx = labels > 0
                obj_score = obj_score[idx]
                labels = labels[idx]
                paths = paths[idx]
                feature = feature[idx]
                bbox = bbox[idx]

                data = torch.cat(
                    (paths.unsqueeze(1), labels.unsqueeze(1).float(), bbox),
                    dim=-1)
            else:
                data = torch.zeros((0, 6), device=feature.device)
            if image_path is not None and len(data) > 0:
                utils.save_boxes(data, feature.detach(), obj_score.detach(),
                                 image_path, self.pal, self.step,
                                 self.num_classes, self.output_dir)
            size = torch.as_tensor([len(data), len(centroid)],
                                   device=feature.device).float()

            storage = get_event_storage()
            storage.put_scalar("exemplar/obj_th", float(obj_thresh))
            storage.put_scalar("exemplar/cluster_obj_th",
                               float(cluster_obj_thresh))
            storage.put_scalar("exemplar/sel_cluster", int(mask.sum()))
            storage.put_scalar("exemplar/coupled_cos_th",
                               float(coupled_cos_thresh))
            storage.put_scalar("exemplar/new", len(data))
        else:
            size = torch.empty(size=(1, 2), device=feature.device)

        # gather
        if utils.get_world_size() > 1:
            torch.cuda.synchronize()
            dist.broadcast(size, 0)
            if utils.get_rank() > 0:
                data = torch.empty(size=(int(size[0, 0]), 6),
                                   device=feature.device)
            torch.cuda.synchronize()
            dist.broadcast(data, 0)
        l_cls = self.original_num_classes - 1
        l_new = int(data[:, 1].max() - l_cls) if len(data) > 0 else 0

        cls_weight = self.cls_weight.weight.data
        cls_weight[:self.num_classes + l_new] = 1
        self.cls_weight.weight.data = cls_weight

        if self.pseudo_gt is None:
            self.pseudo_gt = data
        else:
            self.pseudo_gt = torch.cat((self.pseudo_gt, data))
        self.n_pseudo_gt = len(self.pseudo_gt)

        # flush
        if utils.get_rank() == 0:
            try:
                torch.save(
                    self.pseudo_gt.cpu(),
                    os.path.join(self.output_dir,
                                 'pseudo_gts/{}.pth'.format(self.step)))
            except:
                pass
예제 #13
0
    def fcos_losses(self, instances):
        num_classes = instances.logits_pred.size(1)
        assert num_classes == self.num_classes

        labels = instances.labels.flatten()
        gt_object = instances.gt_inds

        pos_inds = torch.nonzero(labels != num_classes).squeeze(1)
        neg_inds = torch.nonzero(labels == num_classes).squeeze(1)
        num_pos_local = pos_inds.numel()
        num_gpus = get_world_size()
        total_num_pos = reduce_sum(pos_inds.new_tensor([num_pos_local])).item()
        num_pos_avg = max(total_num_pos / num_gpus, 1.0)

        # prepare one_hot
        class_target = torch.zeros_like(instances.logits_pred)
        class_target[pos_inds, labels[pos_inds]] = 1

        class_loss = sigmoid_focal_loss_jit(
            instances.logits_pred,
            class_target,
            alpha=self.focal_loss_alpha,
            gamma=self.focal_loss_gamma,
            reduction="none",
        )  #/ num_pos_avg

        positive_diff = (
            1 - instances.logits_pred[class_target == 1].sigmoid()).abs()
        negative_diff = (
            0 - instances.logits_pred[class_target == 0].sigmoid()).abs()

        positive_mean = positive_diff.mean().detach()
        positive_std = positive_diff.std().detach()

        negative_mean = negative_diff.mean().detach()
        negative_std = negative_diff.std().detach()

        upper_true_loss = class_loss.flatten()[(class_target == 1).flatten()][
            (positive_diff >
             (positive_mean + positive_std))].sum() / num_pos_avg
        under_true_loss = class_loss.flatten()[(class_target == 1).flatten()][
            (positive_diff <=
             (positive_mean + positive_std))].sum() / num_pos_avg
        upper_false_loss = class_loss.flatten()[(class_target == 0).flatten()][
            (negative_diff >
             (negative_mean + negative_std))].sum() / num_pos_avg
        under_false_loss = class_loss.flatten()[(class_target == 0).flatten()][
            (negative_diff <=
             (negative_mean + negative_std))].sum() / num_pos_avg

        storage = get_event_storage()
        if storage.iter % 20 == 0:
            logger.info(
                "upper_true {}, under_true {} upper_false {} under_false {}".
                format((positive_diff > positive_mean + positive_std).sum(),
                       (positive_diff <= positive_mean + positive_std).sum(),
                       (negative_diff > negative_mean + negative_std).sum(),
                       (negative_diff <= negative_mean + negative_std).sum()))

        instances = instances[pos_inds]
        instances.pos_inds = pos_inds

        #assert (instances.gt_inds.unique() != gt_object.unique()).sum() == 0

        ctrness_targets = compute_ctrness_targets(instances.reg_targets)
        ctrness_targets_sum = ctrness_targets.sum()
        loss_denorm = max(
            reduce_sum(ctrness_targets_sum).item() / num_gpus, 1e-6)
        instances.gt_ctrs = ctrness_targets

        if pos_inds.numel() > 0:
            reg_loss = self.loc_loss_func(instances.reg_pred,
                                          instances.reg_targets,
                                          ctrness_targets) / loss_denorm

            ctrness_loss = torch.nn.MSELoss(reduction="sum")(
                instances.ctrness_pred.sigmoid(),
                ctrness_targets) / num_pos_avg
        else:
            reg_loss = instances.reg_pred.sum() * 0
            ctrness_loss = instances.ctrness_pred.sum() * 0

        losses = {
            "loss_upper_true_cls": upper_true_loss,
            "loss_under_true_cls": under_true_loss,
            "loss_upper_false_cls": upper_false_loss,
            "loss_under_false_cls": under_false_loss,
            "loss_fcos_loc": reg_loss,
            "loss_fcos_ctr": ctrness_loss,
            #"loss_negative_identity_mean": negative_identity_mean_loss,
            #"loss_negative_identity_std": negative_identity_std_loss,
            #"loss_positive_identity": positive_identity_loss,
        }
        extras = {"instances": instances, "loss_denorm": loss_denorm}
        return extras, losses
예제 #14
0
    def losses(
        self,
        anchors: List[Boxes],
        pred_objectness_logits: List[torch.Tensor],
        gt_labels: List[torch.Tensor],
        pred_anchor_deltas: List[torch.Tensor],
        gt_boxes: List[torch.Tensor],
    ) -> Dict[str, torch.Tensor]:
        """
        Return the losses from a set of RPN predictions and their associated ground-truth.

        Args:
            anchors (list[Boxes or RotatedBoxes]): anchors for each feature map, each
                has shape (Hi*Wi*A, B), where B is box dimension (4 or 5).
            pred_objectness_logits (list[Tensor]): A list of L elements.
                Element i is a tensor of shape (N, Hi*Wi*A) representing
                the predicted objectness logits for all anchors.
            gt_labels (list[Tensor]): Output of :meth:`label_and_sample_anchors`.
            pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape
                (N, Hi*Wi*A, 4 or 5) representing the predicted "deltas" used to transform anchors
                to proposals.
            gt_boxes (list[Tensor]): Output of :meth:`label_and_sample_anchors`.

        Returns:
            dict[loss name -> loss value]: A dict mapping from loss name to loss value.
                Loss names are: `loss_rpn_cls` for objectness classification and
                `loss_rpn_loc` for proposal localization.
        """
        num_images = len(gt_labels)
        gt_labels = torch.stack(gt_labels)  # (N, sum(Hi*Wi*Ai))

        # Log the number of positive/negative anchors per-image that's used in training
        pos_mask = gt_labels == 1
        num_pos_anchors = pos_mask.sum().item()
        num_neg_anchors = (gt_labels == 0).sum().item()
        storage = get_event_storage()
        storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / num_images)
        storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / num_images)

        if self.box_reg_loss_type == "smooth_l1":
            anchors = type(anchors[0]).cat(anchors).tensor  # Ax(4 or 5)
            gt_anchor_deltas = [
                self.box2box_transform.get_deltas(anchors, k) for k in gt_boxes
            ]
            gt_anchor_deltas = torch.stack(
                gt_anchor_deltas)  # (N, sum(Hi*Wi*Ai), 4 or 5)
            localization_loss = smooth_l1_loss(
                cat(pred_anchor_deltas, dim=1)[pos_mask],
                gt_anchor_deltas[pos_mask],
                self.smooth_l1_beta,
                reduction="sum",
            )
        elif self.box_reg_loss_type == "giou":
            pred_proposals = self._decode_proposals(anchors,
                                                    pred_anchor_deltas)
            pred_proposals = cat(pred_proposals, dim=1)
            pred_proposals = pred_proposals.view(-1, pred_proposals.shape[-1])
            pos_mask = pos_mask.view(-1)
            localization_loss = giou_loss(pred_proposals[pos_mask],
                                          cat(gt_boxes)[pos_mask],
                                          reduction="sum")
        elif self.box_reg_loss_type == "diou":
            anchors = type(anchors[0]).cat(anchors).tensor  # Ax(4 or 5)
            gt_anchor_deltas = [
                self.box2box_transform.get_deltas(anchors, k) for k in gt_boxes
            ]
            gt_anchor_deltas = torch.stack(
                gt_anchor_deltas)  # (N, sum(Hi*Wi*Ai), 4 or 5)
            localization_loss = compute_diou(
                cat(pred_anchor_deltas, dim=1)[pos_mask],
                gt_anchor_deltas[pos_mask], self.box2box_transform.weights,
                self.box2box_transform.scale_clamp)
        # elif self.box_reg_loss_type == "diou_bbox":
        #     pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas)
        #     pred_proposals = cat(pred_proposals, dim=1)
        #     pred_proposals = pred_proposals.view(-1, pred_proposals.shape[-1])
        #     pos_mask = pos_mask.view(-1)
        #     localization_loss = giou_loss(
        #         pred_proposals[pos_mask], cat(gt_boxes)[pos_mask]
        #     )
        elif self.box_reg_loss_type == "diou_mmdet":
            pred_proposals = self._decode_proposals(anchors,
                                                    pred_anchor_deltas)
            pred_proposals = cat(pred_proposals, dim=1)
            pred_proposals = pred_proposals.view(-1, pred_proposals.shape[-1])
            pos_mask = pos_mask.view(-1)
            localization_loss = compute_diou_mmdet(pred_proposals[pos_mask],
                                                   cat(gt_boxes)[pos_mask])
        elif self.box_reg_loss_type == "ciou_mmdet":
            pred_proposals = self._decode_proposals(anchors,
                                                    pred_anchor_deltas)
            pred_proposals = cat(pred_proposals, dim=1)
            pred_proposals = pred_proposals.view(-1, pred_proposals.shape[-1])
            pos_mask = pos_mask.view(-1)
            localization_loss = compute_ciou_mmdet(pred_proposals[pos_mask],
                                                   cat(gt_boxes)[pos_mask])
        else:
            raise ValueError(
                f"Invalid rpn box reg loss type '{self.box_reg_loss_type}'")

        valid_mask = gt_labels >= 0
        objectness_loss = F.binary_cross_entropy_with_logits(
            cat(pred_objectness_logits, dim=1)[valid_mask],
            gt_labels[valid_mask].to(torch.float32),
            reduction="sum",
        )
        normalizer = self.batch_size_per_image * num_images
        losses = {
            "loss_rpn_cls": objectness_loss / normalizer,
            "loss_rpn_loc": localization_loss / normalizer,
        }
        losses = {
            k: v * self.loss_weight.get(k, 1.0)
            for k, v in losses.items()
        }
        return losses
예제 #15
0
    def _forward_box(
        self, features: Dict[str, torch.Tensor], proposals: List[Instances]
    ) -> Union[Dict[str, torch.Tensor], List[Instances]]:
        """
        Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`,
            the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument.

        Args:
            features (dict[str, Tensor]): mapping from feature map names to tensor.
                Same as in :meth:`ROIHeads.forward`.
            proposals (list[Instances]): the per-image object proposals with
                their matching ground truth.
                Each has fields "proposal_boxes", and "objectness_logits",
                "gt_classes", "gt_boxes".

        Returns:
            In training, a dict of losses.
            In inference, a list of `Instances`, the predicted instances.
        """
        features = [features[f] for f in self.box_in_features]
        box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])

        objectness_logits = torch.cat([x.objectness_logits + 1 for x in proposals], dim=0)
        box_features = box_features * objectness_logits.view(-1, 1, 1, 1)
        if self.training:
            storage = get_event_storage()
            storage.put_scalar("proposals/objectness_logits+1 mean", objectness_logits.mean())
            storage.put_scalar("proposals/objectness_logits+1 max", objectness_logits.max())
            storage.put_scalar("proposals/objectness_logits+1 min", objectness_logits.min())

        # torch.cuda.empty_cache()

        box_features = self.box_head(box_features)
        predictions = self.box_predictor(box_features, proposals)
        # del box_features

        if self.training:
            losses = self.box_predictor.losses(predictions, proposals, self.gt_classes_img_oh)

            self.pred_class_img_logits = (
                self.box_predictor.predict_probs_img(predictions, proposals).clone().detach()
            )

            prev_pred_scores = predictions[0].detach()
            prev_pred_boxes = [p.proposal_boxes for p in proposals]
            for k in range(self.refine_K):
                suffix = "_r" + str(k)
                targets, target_weights = self.get_pgt(
                    prev_pred_boxes, prev_pred_scores, proposals, suffix
                )

                proposal_append_gt = self.proposal_append_gt
                self.proposal_append_gt = False
                proposals_k, matched_idxs = self.label_and_sample_proposals(
                    proposals, targets, ret_MI=True, suffix=suffix
                )
                self.proposal_append_gt = proposal_append_gt

                proposal_weights = torch.cat(
                    [
                        torch.index_select(target_weight, 0, matched_idx)
                        for target_weight, matched_idx in zip(target_weights, matched_idxs)
                    ],
                    dim=0,
                )

                predictions_k = self.box_refinery[k](box_features)

                losses_k = self.box_refinery[k].losses(predictions_k, proposals_k, proposal_weights)

                prev_pred_scores = self.box_refinery[k].predict_probs(predictions_k, proposals_k)
                prev_pred_boxes = self.box_refinery[k].predict_boxes(predictions_k, proposals_k)
                prev_pred_scores = [
                    prev_pred_score.detach() for prev_pred_score in prev_pred_scores
                ]
                prev_pred_boxes = [prev_pred_box.detach() for prev_pred_box in prev_pred_boxes]

                losses.update(losses_k)
            # proposals is modified in-place below, so losses must be computed first.
            if self.train_on_pred_boxes:
                with torch.no_grad():
                    pred_boxes = self.box_predictor.predict_boxes_for_gt_classes(
                        predictions, proposals
                    )
                    for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes):
                        proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image)
            return losses
        else:
            if self.refine_reg[-1]:
                predictions_k = self.box_refinery[-1](box_features)
                pred_instances, _, all_scores, all_boxes = self.box_refinery[-1].inference(
                    predictions_k, proposals
                )
            else:
                predictions_K = []
                for k in range(self.refine_K):
                    predictions_k = self.box_refinery[k](box_features)
                    predictions_K.append(predictions_k)
                pred_instances, _, all_scores, all_boxes = self.box_refinery[-1].inference(
                    predictions_K, proposals
                )
            return pred_instances, all_scores, all_boxes
예제 #16
0
    def test(cls, cfg, model, mapper_object, evaluators=None):
        """
        Args:
            cfg (CfgNode):
            model (nn.Module):
            evaluators (list[DatasetEvaluator] or None): if None, will call
                :meth:`build_evaluator`. Otherwise, must have the same length as
                `cfg.DATASETS.TEST`.

        Returns:
            dict: a dict of result metrics
        """
        logger = logging.getLogger(__name__)
        if isinstance(evaluators, DatasetEvaluator):
            evaluators = [evaluators]
        if evaluators is not None:
            assert len(
                cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
                    len(cfg.DATASETS.TEST), len(evaluators))

        results = OrderedDict()
        for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
            # if(not isTrackAccuracy):
            # break
            data_loader = cls.build_test_loader(cfg, dataset_name,
                                                mapper_object)
            # When evaluators are passed in as arguments,
            # implicitly assume that evaluators can be created before data_loader.
            if evaluators is not None:
                evaluator = evaluators[idx]
            else:
                try:
                    evaluator = cls.build_evaluator(cfg, dataset_name)
                except NotImplementedError:
                    logger.warn(
                        "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
                        "or implement its `build_evaluator` method.")
                    results[dataset_name] = {}
                    continue

            # if(True): return results
            # return results
            results_i = evaluate.inference_on_dataset(model, data_loader,
                                                      evaluator)

            accuracy_test = round((results_i["accuracy"] * 100), 2)

            storage = get_event_storage()
            storage.put_scalar("accuracy_" + dataset_name,
                               accuracy_test,
                               smoothing_hint=False)

            results[dataset_name] = results_i
            if comm.is_main_process():
                assert isinstance(
                    results_i, dict
                ), "Evaluator must return a dict on the main process. Got {} instead.".format(
                    results_i)
                logger.info("Evaluation results for {} in csv format:".format(
                    dataset_name))
                print_csv_format(results_i)

        if len(results) == 1:
            results = list(results.values())[0]
        return results
예제 #17
0
파일: rpn.py 프로젝트: hieu28022000/LP_Reg
    def losses(
        self,
        anchors: List[Boxes],
        pred_objectness_logits: List[torch.Tensor],
        gt_labels: List[torch.Tensor],
        pred_anchor_deltas: List[torch.Tensor],
        gt_boxes: List[torch.Tensor],
    ) -> Dict[str, torch.Tensor]:
        """
        Return the losses from a set of RPN predictions and their associated ground-truth.

        Args:
            anchors (list[Boxes or RotatedBoxes]): anchors for each feature map, each
                has shape (Hi*Wi*A, B), where B is box dimension (4 or 5).
            pred_objectness_logits (list[Tensor]): A list of L elements.
                Element i is a tensor of shape (N, Hi*Wi*A) representing
                the predicted objectness logits for all anchors.
            gt_labels (list[Tensor]): Output of :meth:`label_and_sample_anchors`.
            pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape
                (N, Hi*Wi*A, 4 or 5) representing the predicted "deltas" used to transform anchors
                to proposals.
            gt_boxes (list[Tensor]): Output of :meth:`label_and_sample_anchors`.

        Returns:
            dict[loss name -> loss value]: A dict mapping from loss name to loss value.
                Loss names are: `loss_rpn_cls` for objectness classification and
                `loss_rpn_loc` for proposal localization.
        """
        num_images = len(gt_labels)
        gt_labels = torch.stack(gt_labels)  # (N, sum(Hi*Wi*Ai))

        # Log the number of positive/negative anchors per-image that's used in training
        pos_mask = gt_labels == 1
        num_pos_anchors = pos_mask.sum().item()
        num_neg_anchors = (gt_labels == 0).sum().item()
        storage = get_event_storage()
        storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / num_images)
        storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / num_images)

        localization_loss = _dense_box_regression_loss(
            anchors,
            self.box2box_transform,
            pred_anchor_deltas,
            gt_boxes,
            pos_mask,
            box_reg_loss_type=self.box_reg_loss_type,
            smooth_l1_beta=self.smooth_l1_beta,
        )

        valid_mask = gt_labels >= 0
        objectness_loss = F.binary_cross_entropy_with_logits(
            cat(pred_objectness_logits, dim=1)[valid_mask],
            gt_labels[valid_mask].to(torch.float32),
            reduction="sum",
        )
        normalizer = self.batch_size_per_image * num_images
        losses = {
            "loss_rpn_cls": objectness_loss / normalizer,
            "loss_rpn_loc": localization_loss / normalizer,
        }
        losses = {
            k: v * self.loss_weight.get(k, 1.0)
            for k, v in losses.items()
        }
        return losses