Example #1
0
    def _init_keypoint_head(cls, cfg, input_shape):
        if not cfg.MODEL.KEYPOINT_ON:
            return {}
        # fmt: off
        in_features       = cfg.MODEL.ROI_HEADS.IN_FEATURES
        pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
        pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)  # noqa
        sampling_ratio    = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
        pooler_type       = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE
        # fmt: on

        in_channels = [input_shape[f].channels for f in in_features][0]

        ret = {"keypoint_in_features": in_features}
        ret["keypoint_pooler"] = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type=pooler_type,
        )
        ret["keypoint_head"] = build_keypoint_head(
            cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution)
        )
        return ret
Example #2
0
    def _init_mask_head(self, cfg, input_shape):
        # fmt: off
        self.mask_on = cfg.MODEL.MASK_ON
        if not self.mask_on:
            return
        self.mask_coarse_in_features = cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES
        self.mask_coarse_side_size = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        self._feature_scales = {
            k: 1.0 / v.stride
            for k, v in input_shape.items()
        }
        # fmt: on

        in_channels = np.sum(
            [input_shape[f].channels for f in self.mask_coarse_in_features])
        self.mask_coarse_head = build_mask_head(
            cfg,
            ShapeSpec(
                channels=in_channels,
                width=self.mask_coarse_side_size,
                height=self.mask_coarse_side_size,
            ),
        )
        self._init_point_head(cfg, input_shape)
    def _init_box_head(self, cfg, input_shape):
        # fmt: off
        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        pooler_scales = tuple(1.0 / input_shape[k].stride
                              for k in self.in_features)
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
        # fmt: on

        # If StandardROIHeads is applied on multiple feature maps (as in FPN),
        # then we share the same predictors and therefore the channel counts must be the same
        in_channels = [input_shape[f].channels for f in self.in_features]
        # Check all channel counts are equal
        assert len(set(in_channels)) == 1, in_channels
        in_channels = in_channels[0]

        assert pooler_type in ["ROIAlignRotated"]

        self.box_pooler = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type=pooler_type,
        )
        self.box_head = build_box_head(
            cfg,
            ShapeSpec(channels=in_channels,
                      height=pooler_resolution,
                      width=pooler_resolution))

        self.box_predictor = FastRCNNOutputLayers(
            input_size=self.box_head.output_size,
            num_classes=self.num_classes,
            cls_agnostic_bbox_reg=self.cls_agnostic_bbox_reg,
            box_dim=5,
        )
Example #4
0
    def __init__(self,
                 input_shape,
                 num_classes,
                 cls_agnostic_bbox_reg,
                 box_dim=4):
        """
        Args:
            input_shape (ShapeSpec): shape of the input feature
            num_classes (int): number of foreground classes
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            box_dim (int): the dimension of bounding boxes.
                Example box dimensions: 4 for regular XYXY boxes and 5 for rotated XYWHA boxes
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatbility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # The prediction layer for num_classes foreground classes and one background class
        # (hence + 1)
        self.cls_score = Linear(input_size, num_classes + 1)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.constant_(self.cls_score.bias, 0)
Example #5
0
    def _init_point_head(self, cfg, input_shape):
        # fmt: off
        self.mask_point_on = True  # always on
        assert cfg.MODEL.ROI_HEADS.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES
        self.mask_point_in_features             = cfg.MODEL.POINT_HEAD.IN_FEATURES
        self.mask_point_train_num_points        = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS
        # next two parameters are use in the adaptive subdivions inference procedure
        self.mask_point_subdivision_steps       = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS
        self.mask_point_subdivision_num_points  = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS
        # fmt: on

        in_channels = np.sum([input_shape[f].channels for f in self.mask_point_in_features])
        self.point_head = build_point_head(cfg, ShapeSpec(channels=in_channels, width=1, height=1))
        self.num_params = self.point_head.num_params

        # inference parameters
        self.mask_point_subdivision_init_resolution = int(
            math.sqrt(self.mask_point_subdivision_num_points)
        )
        assert (
            self.mask_point_subdivision_init_resolution
            * self.mask_point_subdivision_init_resolution
            == self.mask_point_subdivision_num_points
        )
    def _init_keypoint_head(self, cfg):
        # fmt: off
        self.keypoint_on                         = cfg.MODEL.KEYPOINT_ON
        if not self.keypoint_on:
            return
        pooler_resolution                        = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
        pooler_scales                            = tuple(1.0 / self.feature_strides[k] for k in self.in_features)  # noqa
        sampling_ratio                           = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
        pooler_type                              = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE
        self.normalize_loss_by_visible_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS  # noqa
        self.keypoint_loss_weight                = cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT
        # fmt: on

        in_channels = [self.feature_channels[f] for f in self.in_features][0]

        self.keypoint_pooler = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type=pooler_type,
        )
        self.keypoint_head = build_keypoint_head(
            cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution)
        )
Example #7
0
    def test_keypoint_head_scriptability(self):
        input_shape = ShapeSpec(channels=1024, height=14, width=14)
        keypoint_features = torch.randn(4, 1024, 14, 14)

        image_shapes = [(10, 10), (15, 15)]
        pred_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6], [1, 5, 2, 8]],
                                   dtype=torch.float32)
        pred_instance0 = Instances(image_shapes[0])
        pred_instance0.pred_boxes = Boxes(pred_boxes0)
        pred_boxes1 = torch.tensor([[7, 3, 10, 5]], dtype=torch.float32)
        pred_instance1 = Instances(image_shapes[1])
        pred_instance1.pred_boxes = Boxes(pred_boxes1)

        keypoint_head = KRCNNConvDeconvUpsampleHead(input_shape,
                                                    num_keypoints=17,
                                                    conv_dims=[512,
                                                               512]).eval()
        origin_outputs = keypoint_head(
            keypoint_features, deepcopy([pred_instance0, pred_instance1]))

        fields = {
            "pred_boxes": Boxes,
            "pred_keypoints": torch.Tensor,
            "pred_keypoint_heatmaps": torch.Tensor,
        }
        with patch_instances(fields) as NewInstances:
            sciript_keypoint_head = torch.jit.script(keypoint_head)
            pred_instance0 = NewInstances.from_instances(pred_instance0)
            pred_instance1 = NewInstances.from_instances(pred_instance1)
            script_outputs = sciript_keypoint_head(
                keypoint_features, [pred_instance0, pred_instance1])

        for origin_ins, script_ins in zip(origin_outputs, script_outputs):
            assert_instances_allclose(origin_ins,
                                      script_ins.to_instances(),
                                      rtol=0)
Example #8
0
 def output_shape(self):
     return {
         name: ShapeSpec(channels=self._out_feature_channels[name],
                         stride=self._out_feature_strides[name])
         for name in self._out_features
     }
Example #9
0
 def output_shape(self):
     return ShapeSpec(channels=self._out_channels)
Example #10
0
 def __init__(self):
     super().__init__()
     self.model = KRCNNConvDeconvUpsampleHead(
         ShapeSpec(channels=4, height=14, width=14), num_keypoints=17, conv_dims=(4,)
     )
 def __init__(self, cfg):
     super().__init__()
     self.backbone = build_resnet_fpn_backbone(cfg, ShapeSpec(channels=3))
     self.rpn = build_proposal_generator(cfg, self.backbone.output_shape())
 roi_heads=L(StandardROIHeads)(
     num_classes=80,
     batch_size_per_image=512,
     positive_fraction=0.25,
     proposal_matcher=L(Matcher)(thresholds=[0.5],
                                 labels=[0, 1],
                                 allow_low_quality_matches=False),
     box_in_features=["p2", "p3", "p4", "p5"],
     box_pooler=L(ROIPooler)(
         output_size=7,
         scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
         sampling_ratio=0,
         pooler_type="ROIAlignV2",
     ),
     box_head=L(FastRCNNConvFCHead)(
         input_shape=ShapeSpec(channels=256, height=7, width=7),
         conv_dims=[],
         fc_dims=[1024, 1024],
     ),
     box_predictor=L(FastRCNNOutputLayers)(
         input_shape=ShapeSpec(channels=1024),
         test_score_thresh=0.05,
         box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
         num_classes="${..num_classes}",
     ),
     mask_in_features=["p2", "p3", "p4", "p5"],
     mask_pooler=L(ROIPooler)(
         output_size=14,
         scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
         sampling_ratio=0,
         pooler_type="ROIAlignV2",
Example #13
0
    def __init__(
        self,
        input_shape: ShapeSpec,
        *,
        box2box_transform,
        num_classes: int,
        test_score_thresh: float = 0.0,
        test_nms_thresh: float = 0.5,
        test_topk_per_image: int = 100,
        cls_agnostic_bbox_reg: bool = False,
        smooth_l1_beta: float = 0.0,
        box_reg_loss_type: str = "smooth_l1",
        loss_weight: Union[float, Dict[str, float]] = 1.0,
        use_fed_loss: bool = False,
        use_sigmoid_ce: bool = False,
        get_fed_loss_cls_weights: Optional[Callable] = None,
        fed_loss_num_classes: int = 50,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
                `box_reg_loss_type` is "smooth_l1"
            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou",
                "diou", "ciou"
            loss_weight (float|dict): weights to use for losses. Can be single float for weighting
                all losses, or a dict of individual weightings. Valid dict keys are:
                    * "loss_cls": applied to classification loss
                    * "loss_box_reg": applied to box regression loss
            use_fed_loss (bool): whether to use federated loss which samples additional negative
                classes to calculate the loss
            use_sigmoid_ce (bool): whether to calculate the loss using weighted average of binary
                cross entropy with logits. This could be used together with federated loss
            get_fed_loss_cls_weights (Callable): a callable which takes dataset name and frequency
                weight power, and returns the probabilities to sample negative classes for
                federated loss. The implementation can be found in
                detectron2/data/detection_utils.py
            fed_loss_num_classes (int): number of federated classes to keep in total
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        self.num_classes = num_classes
        input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1)
        # prediction layer for num_classes foreground classes and one background class (hence + 1)
        self.cls_score = nn.Linear(input_size, num_classes + 1)
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.bbox_pred = nn.Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for l in [self.cls_score, self.bbox_pred]:
            nn.init.constant_(l.bias, 0)

        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
        self.box_reg_loss_type = box_reg_loss_type
        if isinstance(loss_weight, float):
            loss_weight = {"loss_cls": loss_weight, "loss_box_reg": loss_weight}
        self.loss_weight = loss_weight
        self.use_fed_loss = use_fed_loss
        self.use_sigmoid_ce = use_sigmoid_ce
        self.fed_loss_num_classes = fed_loss_num_classes

        if self.use_fed_loss:
            assert self.use_sigmoid_ce, "Please use sigmoid cross entropy loss with federated loss"
            fed_loss_cls_weights = get_fed_loss_cls_weights()
            assert (
                len(fed_loss_cls_weights) == self.num_classes
            ), "Please check the provided fed_loss_cls_weights. Their size should match num_classes"
            self.register_buffer("fed_loss_cls_weights", fed_loss_cls_weights)
Example #14
0
 def output_shape(self):
     """
     Returns:
         ShapeSpec: the output feature shape
     """
     return ShapeSpec(channels=self.out_channels, height=1, width=1)
Example #15
0
    def __init__(
        self,
        input_shape: ShapeSpec,
        *,
        box2box_transform,
        clustering_items_per_class,
        clustering_start_iter,
        clustering_update_mu_iter,
        clustering_momentum,
        clustering_z_dimension,
        enable_clustering,
        prev_intro_cls,
        curr_intro_cls,
        max_iterations,
        output_dir,
        feat_store_path,
        margin,
        num_classes: int,
        test_score_thresh: float = 0.0,
        test_nms_thresh: float = 0.5,
        test_topk_per_image: int = 100,
        cls_agnostic_bbox_reg: bool = False,
        smooth_l1_beta: float = 0.0,
        box_reg_loss_type: str = "smooth_l1",
        loss_weight: Union[float, Dict[str, float]] = 1.0,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
                `box_reg_loss_type` is "smooth_l1"
            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
            loss_weight (float|dict): weights to use for losses. Can be single float for weighting
                all losses, or a dict of individual weightings. Valid dict keys are:
                    * "loss_cls": applied to classification loss
                    * "loss_box_reg": applied to box regression loss
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # prediction layer for num_classes foreground classes and one background class (hence + 1)
        self.cls_score = Linear(input_size, num_classes + 1)
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for l in [self.cls_score, self.bbox_pred]:
            nn.init.constant_(l.bias, 0)

        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
        self.box_reg_loss_type = box_reg_loss_type
        if isinstance(loss_weight, float):
            loss_weight = {
                "loss_cls": loss_weight,
                "loss_box_reg": loss_weight
            }
        self.loss_weight = loss_weight

        self.num_classes = num_classes
        self.clustering_start_iter = clustering_start_iter
        self.clustering_update_mu_iter = clustering_update_mu_iter
        self.clustering_momentum = clustering_momentum

        self.hingeloss = nn.HingeEmbeddingLoss(2)
        self.enable_clustering = enable_clustering

        self.prev_intro_cls = prev_intro_cls
        self.curr_intro_cls = curr_intro_cls
        self.seen_classes = self.prev_intro_cls + self.curr_intro_cls
        self.invalid_class_range = list(
            range(self.seen_classes, self.num_classes - 1))
        logging.getLogger(__name__).info("Invalid class range: " +
                                         str(self.invalid_class_range))

        self.max_iterations = max_iterations
        self.feature_store_is_stored = False
        self.output_dir = output_dir
        self.feat_store_path = feat_store_path
        self.feature_store_save_loc = os.path.join(self.output_dir,
                                                   self.feat_store_path,
                                                   'feat.pt')

        if os.path.isfile(self.feature_store_save_loc):
            logging.getLogger(
                __name__).info('Trying to load feature store from ' +
                               self.feature_store_save_loc)
            self.feature_store = torch.load(self.feature_store_save_loc)
        else:
            logging.getLogger(__name__).info('Feature store not found in ' +
                                             self.feature_store_save_loc +
                                             '. Creating new feature store.')
            self.feature_store = Store(num_classes + 1,
                                       clustering_items_per_class)
        self.means = [None for _ in range(num_classes + 1)]
        self.margin = margin
import torch

from detectron2.config import get_cfg
from detectron2 import model_zoo

from detectron2.layers import ShapeSpec
from detectron2.modeling.backbone import build_resnet_backbone

cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.85

net = build_resnet_backbone(cfg, ShapeSpec(channels=3))
temp = torch.load("weight.pt")
net.load_state_dict({k: temp[k] for k in net.state_dict()})
net.eval()

with torch.no_grad():
    torch.save(net(torch.load("data.pt")), "res1.pt")
Example #17
0
    def _init_box_head(cls, cfg, input_shape):
        # fmt: off
        in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features)
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
        # fmt: on

        unseen_path = cfg.DATASETS.UNSEEN_LABEL_SET
        meta = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
        if unseen_path != '':
            meta_info = {e: i for i, e in enumerate(meta.thing_classes)}
            with open(unseen_path, 'r') as f:
                lines = [meta_info[e.replace('\n', '')] for e in f.readlines()]
            unseen_label_set = sorted(lines)
            meta.stuff_classes.append('unknown')
            meta.stuff_colors.append([20, 220, 60])
            meta.stuff_dataset_id_to_contiguous_id[201] = 54
            if cfg.MODEL.EOPSN.IGNORE_UNLABELED_REGION or not cfg.MODEL.EOPSN.UNLABELED_REGION:
                label_converter = torch.ones(len(meta.thing_classes) + 1)
            else:
                label_converter = torch.ones(len(meta.thing_classes) + 2)
            for i in unseen_label_set:
                label_converter[i] = 0
            reverse_label_converter = label_converter.nonzero()[:, 0].long()
            label_converter = torch.cumsum(label_converter, 0).long() - 1
            if cfg.MODEL.EOPSN.UNLABELED_REGION:
                if cfg.MODEL.EOPSN.IGNORE_UNLABELED_REGION:
                    reverse_label_converter[-1] = -1
                else:
                    reverse_label_converter[-1] = reverse_label_converter[-2]
                    reverse_label_converter[-2] = -1
        else:
            reverse_label_converter = None
            label_converter = None

        # If StandardROIHeads is applied on multiple feature maps (as in FPN),
        # then we share the same predictors and therefore the channel counts must be the same
        in_channels = [input_shape[f].channels for f in in_features]
        # Check all channel counts are equal
        assert len(set(in_channels)) == 1, in_channels
        in_channels = in_channels[0]

        box_pooler = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type=pooler_type,
        )
        # Here we split "box head" and "box predictor", which is mainly due to historical reasons.
        # They are used together so the "box predictor" layers should be part of the "box head".
        # New subclasses of ROIHeads do not need "box predictor"s.
        box_head = build_box_head(
            cfg,
            ShapeSpec(channels=in_channels,
                      height=pooler_resolution,
                      width=pooler_resolution))
        if cfg.MODEL.EOPSN.PREDICTOR == 'baseline':
            box_predictor = FastRCNNOutputLayers_baseline(
                cfg, box_head.output_shape, label_converter,
                reverse_label_converter)
        elif cfg.MODEL.EOPSN.PREDICTOR == 'eopsn':
            from .eopsn_predictor import FastRCNNOutputLayers_eopsn
            box_predictor = FastRCNNOutputLayers_eopsn(
                cfg, box_head.output_shape, label_converter,
                reverse_label_converter)

        return {
            "box_in_features": in_features,
            "box_pooler": box_pooler,
            "box_head": box_head,
            "box_predictor": box_predictor,
        }
Example #18
0
    def __init__(self,
                 input_shape,
                 *,
                 box2box_transform,
                 num_classes,
                 test_score_thresh=0.0,
                 test_nms_thresh=0.5,
                 test_topk_per_image=100,
                 cls_agnostic_bbox_reg=False,
                 smooth_l1_beta=0.0,
                 box_reg_loss_type="smooth_l1",
                 loss_weight=1.0,
                 oicr_iter=3,
                 fg_threshold=0.5,
                 bg_threshold=0.1,
                 freeze_layers=[],
                 embedding_path='',
                 terms={},
                 mode='Pre_Softmax',
                 mil_multiplier=4.0,
                 detector_temp=1.0,
                 classifier_temp=1.0):
        super(FastRCNNOutputsBase,
              self).__init__(input_shape=input_shape,
                             box2box_transform=box2box_transform,
                             num_classes=num_classes,
                             test_score_thresh=test_score_thresh,
                             test_nms_thresh=test_nms_thresh,
                             test_topk_per_image=test_topk_per_image,
                             cls_agnostic_bbox_reg=cls_agnostic_bbox_reg,
                             smooth_l1_beta=smooth_l1_beta,
                             box_reg_loss_type=box_reg_loss_type,
                             loss_weight=loss_weight)
        self.num_classes = num_classes
        self.oicr_iter = oicr_iter
        self.fg_threshold = fg_threshold
        self.bg_threshold = bg_threshold
        self.terms = terms
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.box_dim = box_dim
        self.num_bbox_reg_classes = num_bbox_reg_classes
        self.mode = mode
        self.mil_multiplier = mil_multiplier
        self.detector_temp = detector_temp
        self.classifier_temp = classifier_temp
        # Delete instances defined by super
        del self.cls_score
        del self.bbox_pred

        # Define delta predictors
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        self.input_size = input_size
        self.classifier_stream = Linear(input_size, self.num_classes)
        self.detection_stream = Linear(input_size, self.num_classes)
        self.oicr_predictors = nn.ModuleList([
            Linear(input_size, self.num_classes + 1)
            for _ in range(self.oicr_iter)
        ])
        self.cls_score_delta = Linear(input_size, self.num_classes + 1)
        self.bbox_pred_delta = Linear(input_size,
                                      num_bbox_reg_classes * box_dim)

        # Init Predictors
        nn.init.normal_(self.bbox_pred_delta.weight, std=0.001)
        nn.init.normal_(self.classifier_stream.weight, std=0.01)
        nn.init.normal_(self.detection_stream.weight, std=0.01)
        for oicr_iter in range(self.oicr_iter):
            nn.init.normal_(self.oicr_predictors[oicr_iter].weight, std=0.01)
            nn.init.constant_(self.oicr_predictors[oicr_iter].bias, 0.)
        nn.init.constant_(self.cls_score_delta.weight, 0.)
        # nn.init.constant_(self.bbox_pred_delta.weight, 0.)
        for l in [
                self.cls_score_delta, self.bbox_pred_delta,
                self.detection_stream, self.classifier_stream
        ]:
            nn.init.constant_(l.bias, 0.)

        pretrained_embeddings = torch.load(embedding_path)['embeddings']
        self.embeddings = nn.Embedding.from_pretrained(pretrained_embeddings,
                                                       freeze=True)
        self._freeze_layers(layers=freeze_layers)
Example #19
0
    def init_model(self):
        assert self.backbone in [
            "resnet18", "resnet50", "shufflenet_v2_x1_0", "resnet50_detectron"
        ]
        detectron_resnet_layer4 = None
        if self.backbone == "resnet18":
            backbone = resnet18
            backbone_network = backbone(first_conv=self.first_conv,
                                        maxpool1=self.maxpool1,
                                        return_all_feature_maps=False)
            self.feature_dim = backbone_network.fc.in_features

        elif self.backbone == "resnet50":
            backbone = resnet50
            backbone_network = backbone(first_conv=self.first_conv,
                                        maxpool1=self.maxpool1,
                                        return_all_feature_maps=False)
            self.feature_dim = backbone_network.fc.in_features
        elif self.backbone == "shufflenet_v2_x1_0":
            backbone = shufflenet_v2_x1_0
            backbone_network = backbone()
            self.feature_dim = backbone_network.fc.in_features
            backbone_network.fc = Identity()
        elif self.backbone == "resnet50_detectron":
            with open("examples/local/detectron_resnet50_c4_config.yaml",
                      "r") as f:
                import yaml
                cfg = yaml.load(f, Loader=yaml.Loader)
            from detectron2.modeling.backbone.resnet import build_resnet_backbone
            from detectron2.layers import ShapeSpec
            input_shape = ShapeSpec(3)  #3 channels RGB
            backbone_network = build_resnet_backbone(cfg, input_shape)
            backbone_network = unfreeze_batchnorm_layers(backbone_network)
            detectron_resnet_layer4 = Resnet50Layer4()
            self.feature_dim = 2048
        else:
            raise ValueError(f"Unsupported backbone: {self.backbone}")

        if self.coordconv is not None:
            from thelper.nn.coordconv import swap_coordconv_layers  #Lazy loading.
        if self.coordconv == "all":
            backbone_network = swap_coordconv_layers(backbone_network)
        if self.coordconv == "first":
            backbone_network.conv1 = swap_coordconv_layers(
                backbone_network.conv1)
            #backbone_network =

        self.cyclic_predictor = None
        if self.loss_function == "cyclic":
            #Use 2 stacked inputs for the predictor
            self.cyclic_predictor = PredictionMLP(self.feature_dim * 2,
                                                  self.hidden_mlp,
                                                  self.feature_dim)
        #else:
        #All other methods work on pairs!
        self.online_network = SiameseArm(
            backbone_network,
            input_dim=self.feature_dim,
            hidden_size=self.hidden_mlp,
            output_dim=self.feat_dim,
            detectron_resnet_layer4=detectron_resnet_layer4)
        #max_batch = math.ceil(self.num_samples/self.batch_size)
        encoder, projector = self.online_network.encoder, self.online_network.projector

        self.train_features = torch.zeros((self.num_samples, self.feature_dim))
        self.train_meta = []
        self.train_targets = -torch.ones((self.num_samples))
        self.valid_features = torch.zeros(
            (self.num_samples_valid, self.feature_dim))
        self.valid_meta = []
        self.cuda_train_features = None
Example #20
0
    def __init__(self,
                 input_shape,
                 *,
                 box2box_transform,
                 num_classes,
                 test_score_thresh=0.0,
                 test_nms_thresh=0.5,
                 test_topk_per_image=100,
                 cls_agnostic_bbox_reg=False,
                 smooth_l1_beta=0.0,
                 box_reg_loss_type="smooth_l1",
                 loss_weight=1.0,
                 weak_detector_head=None,
                 regression_branch=False,
                 terms={},
                 freeze_layers=[],
                 embedding_path=''):
        super(SupervisedDetectorOutputsBase,
              self).__init__(input_shape=input_shape,
                             box2box_transform=box2box_transform,
                             num_classes=num_classes,
                             test_score_thresh=test_score_thresh,
                             test_nms_thresh=test_nms_thresh,
                             test_topk_per_image=test_topk_per_image,
                             cls_agnostic_bbox_reg=cls_agnostic_bbox_reg,
                             smooth_l1_beta=smooth_l1_beta,
                             box_reg_loss_type=box_reg_loss_type,
                             loss_weight=loss_weight)
        self.num_classes = num_classes
        self.terms = terms
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.box_dim = box_dim
        self.num_bbox_reg_classes = num_bbox_reg_classes
        self.weak_detector_head = weak_detector_head
        self.regression_branch = regression_branch

        # Delete instances defined by super
        del self.cls_score
        del self.bbox_pred

        # Define delta predictors
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        self.input_size = input_size
        self.cls_score_delta = Linear(input_size, self.num_classes + 1)
        self.bbox_pred_delta = Linear(input_size,
                                      num_bbox_reg_classes * box_dim)

        # Init Predictors
        nn.init.constant_(self.cls_score_delta.weight, 0.)
        if not self.regression_branch:
            nn.init.normal_(self.bbox_pred_delta.weight, std=0.001)
        else:
            nn.init.constant_(self.bbox_pred_delta.weight, 0.)
        for l in [self.cls_score_delta, self.bbox_pred_delta]:
            nn.init.constant_(l.bias, 0.)

        pretrained_embeddings = torch.load(embedding_path)['embeddings']
        self.embeddings = nn.Embedding.from_pretrained(pretrained_embeddings,
                                                       freeze=True)
        self._freeze_layers(layers=freeze_layers)
Example #21
0
 def output_shape(self):
     return {
         "res5":
         ShapeSpec(channels=1024,
                   stride=16 if self.res5_dilation == 2 else 32)
     }
Example #22
0
    def _init_box_head(self, cfg, input_shape):
        # fmt: off
        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        pooler_scales = tuple(1.0 / input_shape[k].stride
                              for k in self.in_features)
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
        st_pooler_type = cfg.MODEL.SPATIOTEMPORAL.ST_POOLER_TYPE
        self.train_on_pred_boxes = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES
        self.st_cls = cfg.MODEL.SPATIOTEMPORAL.ST_CLS
        self.spatial_cls = cfg.MODEL.SPATIOTEMPORAL.SPATIAL_CLS
        self.longterm_proposals = cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.REF_POST_NMS_TOP_N
        self.st_box_head_name = cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.NAME
        self.long_term = cfg.MODEL.SPATIOTEMPORAL.LONG_TERM
        self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
        # fmt: on
        self.st_cls_short_term_aggregation = cfg.MODEL.SPATIOTEMPORAL.ST_CLS_SHORT_TERM_AGGREGATION
        self.proposal_tracking = cfg.MODEL.SPATIOTEMPORAL.PROPOSAL_TRACKING
        self.test_tracking_type = cfg.MODEL.SPATIOTEMPORAL.TEST_TRACKING_TYPE

        # If StandardROIHeads is applied on multiple feature maps (as in FPN),
        # then we share the same predictors and therefore the channel counts must be the same
        in_channels = [input_shape[f].channels for f in self.in_features]
        # Check all channel counts are equal
        assert len(set(in_channels)) == 1, in_channels
        in_channels = in_channels[0]

        self.long_term_proposal_matcher = Matcher(
            [0.3],  # TODO: config(cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS)
            [0, 1],  # TODO: config(cfg.MODEL.ROI_HEADS.IOU_LABELS)
            allow_low_quality_matches=False,
        )

        self.box_pooler = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type=pooler_type,
        )

        self.st_box_pooler = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type=st_pooler_type,
        )

        # Here we split "box head" and "box predictor", which is mainly due to historical reasons.
        # They are used together so the "box predictor" layers should be part of the "box head".
        # New subclasses of ROIHeads do not need "box predictor"s.
        if self.st_cls:
            self.st_box_head = build_st_box_head(
                cfg,
                ShapeSpec(channels=in_channels,
                          height=pooler_resolution,
                          width=pooler_resolution))
            self.st_cls_predictor = StClassificationOutputLayers(
                self.st_box_head.output_size, self.num_classes)
        self.box_head = build_box_head(
            cfg,
            ShapeSpec(channels=in_channels,
                      height=pooler_resolution,
                      width=pooler_resolution))
        self.box_predictor = FastRCNNOutputLayers(self.box_head.output_size,
                                                  self.num_classes,
                                                  self.cls_agnostic_bbox_reg)

        if cfg.MODEL.SPATIOTEMPORAL.FREEZE_SPATIAL_HEAD:
            self.freeze_component(self.box_head)
            self.freeze_component(self.box_predictor)
Example #23
0
    def __init__(
        self,
        input_shape,
        *,
        box2box_transform,
        num_classes,
        num_attr_classes,
        max_attr_pred,
        attr_cls_mode,
        attr_cls_agnostic,
        ignore_nan_attr_class,
        test_attr_score_thresh=0.5,
        cls_agnostic_bbox_reg=False,
        smooth_l1_beta=0.0,
        test_score_thresh=0.0,
        test_nms_thresh=0.5,
        test_topk_per_image=100,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            num_attr_classes (int): number of attributes classes
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss.
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # The prediction layer for num_classes foreground classes and one background class
        # (hence + 1)
        self.cls_score = Linear(input_size, num_classes + 1)
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)
        #print("Is class agnostic: ", attr_cls_agnostic)
        if attr_cls_agnostic:
            num_attr_reg_classes = 1
        else:
            num_attr_reg_classes = num_classes
        if attr_cls_mode == 0:
            self.attr_cls_score = Linear(
                input_size, num_attr_reg_classes * num_attr_classes)
            nn.init.normal_(self.attr_cls_score.weight, std=0.01)
            nn.init.constant_(self.attr_cls_score.bias, 0)
        elif attr_cls_mode == 1:
            self.attr_cls_score_1 = Linear(input_size, 1024)
            self.attr_cls_score_2 = Linear(
                1024, num_attr_reg_classes * num_attr_classes)
            nn.init.normal_(self.attr_cls_score_1.weight, std=0.01)
            nn.init.constant_(self.attr_cls_score_1.bias, 0)
            nn.init.normal_(self.attr_cls_score_2.weight, std=0.01)
            nn.init.constant_(self.attr_cls_score_2.bias, 0)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for l in [self.cls_score, self.bbox_pred]:
            nn.init.constant_(l.bias, 0)
        self.num_attr_classes = num_attr_classes  #295
        self.max_attr_pred = max_attr_pred
        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
        self.test_attr_score_thresh = test_attr_score_thresh
        self.attr_cls_mode = attr_cls_mode
        self.attr_cls_agnostic = attr_cls_agnostic
        self.ignore_nan_attr_class = ignore_nan_attr_class
Example #24
0
    def test_StandardROIHeads_scriptability(self):
        cfg = get_cfg()
        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)
        cfg.MODEL.MASK_ON = True
        cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.01
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01
        num_images = 2
        images_tensor = torch.rand(num_images, 20, 30)
        image_sizes = [(10, 10), (20, 30)]
        images = ImageList(images_tensor, image_sizes)
        num_channels = 1024
        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
        feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)}

        roi_heads = StandardROIHeads(cfg, feature_shape).eval()

        proposal0 = Instances(image_sizes[0])
        proposal_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]],
                                       dtype=torch.float32)
        proposal0.proposal_boxes = Boxes(proposal_boxes0)
        proposal0.objectness_logits = torch.tensor([0.5, 0.7],
                                                   dtype=torch.float32)

        proposal1 = Instances(image_sizes[1])
        proposal_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]],
                                       dtype=torch.float32)
        proposal1.proposal_boxes = Boxes(proposal_boxes1)
        proposal1.objectness_logits = torch.tensor([0.1, 0.9],
                                                   dtype=torch.float32)
        proposals = [proposal0, proposal1]

        pred_instances, _ = roi_heads(images, features, proposals)
        fields = {
            "objectness_logits": "Tensor",
            "proposal_boxes": "Boxes",
            "pred_classes": "Tensor",
            "scores": "Tensor",
            "pred_masks": "Tensor",
            "pred_boxes": "Boxes",
            "pred_keypoints": "Tensor",
            "pred_keypoint_heatmaps": "Tensor",
        }
        with patch_instances(fields) as new_instances:
            proposal0 = new_instances.from_instances(proposal0)
            proposal1 = new_instances.from_instances(proposal1)
            proposals = [proposal0, proposal1]
            scripted_rot_heads = torch.jit.script(roi_heads)
            scripted_pred_instances, _ = scripted_rot_heads(
                images, features, proposals)

        for instance, scripted_instance in zip(pred_instances,
                                               scripted_pred_instances):
            self.assertEqual(instance.image_size, scripted_instance.image_size)
            self.assertTrue(
                torch.equal(instance.pred_boxes.tensor,
                            scripted_instance.pred_boxes.tensor))
            self.assertTrue(
                torch.equal(instance.scores, scripted_instance.scores))
            self.assertTrue(
                torch.equal(instance.pred_classes,
                            scripted_instance.pred_classes))
            self.assertTrue(
                torch.equal(instance.pred_masks, scripted_instance.pred_masks))
Example #25
0
    def __init__(self,
                 input_shape,
                 *,
                 box2box_transform,
                 num_classes,
                 test_score_thresh=0.0,
                 test_nms_thresh=0.5,
                 test_topk_per_image=100,
                 cls_agnostic_bbox_reg=False,
                 smooth_l1_beta=0.0,
                 box_reg_loss_type="smooth_l1",
                 box_reg_loss_weight=1.0,
                 add_unlabeled_class=False,
                 label_converter=None,
                 reverse_label_converter=None,
                 num_centroid=256,
                 clustering_interval=1000,
                 cluster_obj_thresh=0.8,
                 coupled_cos_thresh=0.15,
                 coupled_obj_thresh=0.9,
                 cos_thresh=0.15,
                 pos_class_thresh=0.7,
                 nms_thresh=0.3,
                 n_sample=20,
                 output_dir='./'):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
                `box_reg_loss_type` is "smooth_l1"
            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
            box_reg_loss_weight (float): Weight for box regression loss
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # The prediction layer for num_classes foreground classes and one background class
        # (hence + 1)
        self.label_converter = label_converter
        self.reverse_label_converter = reverse_label_converter
        self.original_num_classes = len(self.label_converter)
        addition = self.label_converter.max() + torch.arange(num_centroid) + 1
        self.label_converter = torch.cat((self.label_converter, addition))

        if self.reverse_label_converter is not None:
            num_classes = min(num_classes + 1, len(reverse_label_converter))
        num_cls = num_classes

        self.add_unlabeled_class = add_unlabeled_class
        self.num_classes = num_cls

        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_cls - 1
        box_dim = len(box2box_transform.weights)
        self.cls_score = Linear(input_size, num_cls + num_centroid)
        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.constant_(self.cls_score.bias, 0)

        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        nn.init.constant_(self.bbox_pred.bias, 0)

        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
        self.box_reg_loss_type = box_reg_loss_type
        self.box_reg_loss_weight = box_reg_loss_weight

        self.feature_memory = []
        self.label_memory = []
        self.obj_score_memory = []
        self.path_memory = []
        self.bbox_memory = []

        self.num_centroid = num_centroid
        self.clustering_interval = clustering_interval
        weight = torch.zeros((num_centroid, input_size))
        weight = torch.zeros((num_centroid, 1))
        weight = torch.zeros((num_centroid + num_cls, 1))
        weight[:num_cls] = 1
        self.cls_weight = nn.Embedding(num_centroid + num_cls,
                                       1).from_pretrained(weight, freeze=True)
        self.turn_on = False
        self.step = 1
        self.cluster_count = 1
        self.pseudo_gt = None
        self.n_pseudo_gt = 0

        self.n_sample = n_sample
        self.cluster_obj_thresh = cluster_obj_thresh
        self.cos_thresh = cos_thresh
        self.coupled_cos_thresh = coupled_cos_thresh
        self.coupled_obj_thresh = coupled_obj_thresh
        self.pos_class_thresh = pos_class_thresh
        self.nms_thresh = nms_thresh
        self.pal = np.random.random((1024, 3)) * 255

        self.size_opt = 'lm'

        self.output_dir = output_dir

        g_list = glob.glob(os.path.join(self.output_dir, 'pseudo_gts',
                                        '*.pth'))
        if len(g_list) > 0:
            g_list = [
                int(x.split('/')[-1].replace('.pth', '')) for x in g_list
            ]
            g = max(g_list)
            path = os.path.join(self.output_dir, 'pseudo_gts/{}.pth').format(g)
            self.pseudo_gt = torch.load(path)
            self.n_pseudo_gt = len(self.pseudo_gt)
            self.step = g + 1
            if self.pseudo_gt is not None and len(self.pseudo_gt) > 0:

                label = int(self.pseudo_gt[:, 1].max())
                weight[:label] = 1
                self.cls_weight = nn.Embedding(num_centroid + num_cls,
                                               1).from_pretrained(weight,
                                                                  freeze=True)
Example #26
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)
        self.backbone_2 = None
        #Jamie
        if cfg.INPUT.NUM_IN_CHANNELS != 3:
            # Middle fusion
            if cfg.INPUT.FORMAT == 'BGRTTT' or cfg.INPUT.FORMAT == 'BGRTTT_perturb':  # middle fusion
                input_shape = ShapeSpec(channels=3)
                self.backbone_2 = build_backbone(cfg, input_shape)
            else:  # Early fusion
                input_shape = ShapeSpec(channels=cfg.INPUT.NUM_IN_CHANNELS)

            # Jamie
            #if cfg.INPUT.FORMAT = 'BGRTTT':
            #    self.backbone =
            self.backbone = build_backbone(cfg, input_shape)
            num_channels = cfg.INPUT.NUM_IN_CHANNELS
            #Jamie
            print(num_channels, ' channel input')

        else:  # RGB or thermal only
            print('3 channel input')
            self.backbone = build_backbone(cfg)
            num_channels = len(cfg.MODEL.PIXEL_MEAN)
            #import pdb; pdb.set_trace()

        if cfg.INPUT.FORMAT == 'BGRTTT' or cfg.INPUT.FORMAT == 'BGRTTT_perturb':
            output_shape = {}
            for key in self.backbone.output_shape().keys():
                temp_num_channel = self.backbone.output_shape(
                )[key].channels * 2
                temp_stride = self.backbone.output_shape()[key].stride
                output_shape[key] = ShapeSpec(channels=temp_num_channel,
                                              stride=temp_stride)
            self.proposal_generator = build_proposal_generator(
                cfg, output_shape)
            self.roi_heads = build_roi_heads(cfg, output_shape)
            del output_shape
            pixel_mean_RGB = torch.Tensor(cfg.MODEL.PIXEL_MEAN[:3]).to(
                self.device).view(3, 1, 1)
            pixel_mean_thermal = torch.Tensor(cfg.MODEL.PIXEL_MEAN[3:]).to(
                self.device).view(3, 1, 1)
            pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD[:3]).to(
                self.device).view(3, 1, 1)

            self.normalizer = lambda x: (x - pixel_mean_RGB) / pixel_std
            self.normalizer_thermal = lambda x: (x - pixel_mean_thermal
                                                 ) / pixel_std
        else:
            self.proposal_generator = build_proposal_generator(
                cfg, self.backbone.output_shape())
            self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
            pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(
                self.device).view(num_channels, 1, 1)
            pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
                num_channels, 1, 1)
            self.normalizer = lambda x: (x - pixel_mean) / pixel_std

        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)

        self.to(self.device)

        # Jamie
        self.blur_rgb = False
        if cfg.MODEL.BLUR_RGB:
            self.blur_rgb = True
        self.max_pool_rgb = False
        if cfg.MODEL.MAX_POOL_RGB:
            self.max_pool_rgb = True
        """
Example #27
0
    def __init__(
        self,
        input_shape: ShapeSpec,
        *,
        box2box_transform,
        num_classes: int,
        test_score_thresh: float = 0.0,
        test_nms_thresh: float = 0.5,
        test_topk_per_image: int = 100,
        cls_agnostic_bbox_reg: bool = False,
        smooth_l1_beta: float = 0.0,
        box_reg_loss_type: str = "smooth_l1",
        loss_weight: Union[float, Dict[str, float]] = 1.0,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
                `box_reg_loss_type` is "smooth_l1"
            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
            loss_weight (float|dict): weights to use for losses. Can be single float for weighting
                all losses, or a dict of individual weightings. Valid dict keys are:
                    * "loss_cls": applied to classification loss
                    * "loss_box_reg": applied to box regression loss
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # prediction layer for num_classes foreground classes and one background class (hence + 1)
        self.cls_score = Linear(input_size, num_classes + 1)
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for l in [self.cls_score, self.bbox_pred]:
            nn.init.constant_(l.bias, 0)

        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
        self.box_reg_loss_type = box_reg_loss_type
        if isinstance(loss_weight, float):
            loss_weight = {
                "loss_cls": loss_weight,
                "loss_box_reg": loss_weight
            }
        self.loss_weight = loss_weight
Example #28
0
    def __init__(self,
                 cfg=None,
                 load_path=None,
                 depth=101,
                 vec_dim=128,
                 max_pool=False,
                 clf1_num=None,
                 clf2_num=None,
                 adv_eta=None):
        super(ResNetbasedNet, self).__init__()
        self.load = True if load_path is not None else False
        self.clf1 = True if clf1_num is not None else False
        self.clf2 = True if clf2_num is not None else False
        self.adv_eta = Variable(
            torch.tensor(adv_eta).type(torch.float),
            requires_grad=False) if adv_eta is not None else None

        if cfg is not None:
            model = build_resnet_backbone(
                cfg, ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
            pretrained_model = torch.load(cfg.MODEL.WEIGHTS)
            cur_state = model.state_dict()
            mapped_dict = {}
            for name, param in pretrained_model.items():
                if name == 'model':
                    for p in param:
                        if p.replace('backbone.bottom_up.', '') in cur_state:
                            mapped_dict[p.replace('backbone.bottom_up.',
                                                  '')] = param[p]
            model.load_state_dict(mapped_dict)
            self.backbone = nn.Sequential(*list(model.children()))
        else:
            model = torch.hub.load('pytorch/vision:v0.6.0',
                                   'resnet{}'.format(depth),
                                   pretrained=not self.load)
            self.backbone = nn.Sequential(*list(model.children())[:-2])

        self.max_pool = nn.AdaptiveMaxPool2d(
            (1, 1)) if max_pool else nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(2048, vec_dim)

        if self.clf1:
            self.clf1_layer = nn.Sequential(nn.Linear(vec_dim, vec_dim),
                                            nn.BatchNorm1d(vec_dim), nn.ReLU(),
                                            nn.Linear(vec_dim, clf1_num))

        if self.clf2:
            self.clf2_layer = nn.Sequential(nn.Linear(vec_dim, vec_dim),
                                            nn.BatchNorm1d(vec_dim), nn.ReLU(),
                                            nn.Linear(vec_dim, clf2_num))

        if self.load:
            load_model = torch.load(load_path)
            mapped_dict = {
                'backbone': (self.backbone, {}),
                'fc': (self.fc, {})
            }
            if self.clf1:
                mapped_dict['clf1_layer'] = (self.clf1_layer, {})
            if self.clf2:
                # print(self.clf2_layer.state_dict())
                mapped_dict['clf2_layer'] = (self.clf2_layer, {})
            for name, param in load_model.items():
                if name.split('.')[0] in mapped_dict.keys():
                    mapped_dict[name.split('.')[0]][1]['.'.join(
                        name.split('.')[1:])] = param
            for layers in mapped_dict.keys():
                mapped_dict[layers][0].load_state_dict(mapped_dict[layers][1])
Example #29
0
    def __init__(
        self,
        input_shape,
        *,
        standard_cls_bone,
        std_num_classes,
        std_cls_emb_dim,
        box2box_transform,
        num_classes,
        arc_args={},
        test_score_thresh=0.0,
        test_nms_thresh=0.5,
        test_topk_per_image=100,
        category_loss_type='cross_entropy',
        std_cls_loss_type='softmax',
        cls_agnostic_bbox_reg=False,
        smooth_l1_beta=0.0,
        box_reg_loss_type="smooth_l1",
        box_reg_loss_weight=1.0,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
                `box_reg_loss_type` is "smooth_l1"
            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
            box_reg_loss_weight (float): Weight for box regression loss
        """
        super(MlabelStandardFastRCNNOutputLayer2, self).__init__()
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # The prediction layer for num_classes foreground classes and one background class
        # (hence + 1)
        # 大类别分类
        self.category_score = nn.Sequential(
            Flatten(), Linear(input_size, num_classes + 1))
        # box回归
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.bbox_pred = nn.Sequential(
            Flatten(), Linear(input_size, num_bbox_reg_classes * box_dim))
        # 细分类
        self.standard_cls_bone = standard_cls_bone
        if std_cls_loss_type == 'softmax':
            self.std_cls_score = Linear(std_cls_emb_dim, std_num_classes + 1)
            nn.init.normal_(self.std_cls_score.weight, std=0.01)
            nn.init.constant_(self.std_cls_score.bias, 0)
        elif std_cls_loss_type == 'arc':
            self.std_cls_score = ArcLayer(std_cls_emb_dim,
                                          std_num_classes + 1,
                                          s=arc_args['s'],
                                          m=arc_args['m'],
                                          easy_margin=arc_args['easy_margin'])
        else:
            raise NotImplementedError('目前仅支持softmax、arc两种模式,暂不支持{}'.format(
                std_cls_loss_type, ))
        for pairs in [
                self.standard_cls_bone.named_parameters(),
                self.category_score.named_parameters(),
                self.bbox_pred.named_parameters()
        ]:
            for name, params in pairs:
                if 'weight' in name:
                    nn.init.normal_(params, std=0.01)
                elif 'bias' in name:
                    nn.init.constant_(params, 0.)
        self.std_cls_loss_type = std_cls_loss_type
        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
        self.box_reg_loss_type = box_reg_loss_type
        self.box_reg_loss_weight = box_reg_loss_weight
        self.std_cls_loss_type = std_cls_loss_type
        self.category_loss_type = category_loss_type
Example #30
0
 def output_shape(self):
     return {f"stride{s}":
                 ShapeSpec(channels=self._out_feature_channels[k], stride=s)
             for k, s in self._out_feature_strides.items()}