def test_default_anchor_generator_centered(self): cfg = get_cfg() cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]] cfg.MODEL.ANCHOR_GENERATOR.OFFSET = 0.5 anchor_generator = DefaultAnchorGenerator(cfg, [ShapeSpec(stride=4)]) # only the last two dimensions of features matter here num_images = 2 features = {"stage3": torch.rand(num_images, 96, 1, 2)} anchors = anchor_generator([features["stage3"]]) expected_anchor_tensor = torch.tensor([ [-30.0, -6.0, 34.0, 10.0], [-14.0, -14.0, 18.0, 18.0], [-6.0, -30.0, 10.0, 34.0], [-62.0, -14.0, 66.0, 18.0], [-30.0, -30.0, 34.0, 34.0], [-14.0, -62.0, 18.0, 66.0], [-26.0, -6.0, 38.0, 10.0], [-10.0, -14.0, 22.0, 18.0], [-2.0, -30.0, 14.0, 34.0], [-58.0, -14.0, 70.0, 18.0], [-26.0, -30.0, 38.0, 34.0], [-10.0, -62.0, 22.0, 66.0], ]) for i in range(num_images): assert torch.allclose(anchors[i][0].tensor, expected_anchor_tensor)
def output_shape(self): return { name: ShapeSpec( channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] ) for name in self._out_features }
def test_default_anchor_generator(self): cfg = get_cfg() cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]] anchor_generator = DefaultAnchorGenerator(cfg, [ShapeSpec(stride=4)]) # only the last two dimensions of features matter here num_images = 2 features = {"stage3": torch.rand(num_images, 96, 1, 2)} anchors = anchor_generator([features["stage3"]]) expected_anchor_tensor = torch.tensor([ [-32.0, -8.0, 32.0, 8.0], [-16.0, -16.0, 16.0, 16.0], [-8.0, -32.0, 8.0, 32.0], [-64.0, -16.0, 64.0, 16.0], [-32.0, -32.0, 32.0, 32.0], [-16.0, -64.0, 16.0, 64.0], [-28.0, -8.0, 36.0, 8.0], # -28.0 == -32.0 + STRIDE (4) [-12.0, -16.0, 20.0, 16.0], [-4.0, -32.0, 12.0, 32.0], [-60.0, -16.0, 68.0, 16.0], [-28.0, -32.0, 36.0, 32.0], [-12.0, -64.0, 20.0, 64.0], ]) for i in range(num_images): assert torch.allclose(anchors[i][0].tensor, expected_anchor_tensor)
def _init_keypoint_head(self, cfg, input_shape): # fmt: off self.keypoint_on = cfg.MODEL.KEYPOINT_ON if not self.keypoint_on: return pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) # noqa sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE # fmt: on in_channels = [input_shape[f].channels for f in self.in_features][0] self.keypoint_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.keypoint_head = build_keypoint_head( cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution))
def _init_box_head(self, cfg, input_shape): # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE self.train_on_pred_boxes = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES # fmt: on # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in self.in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] self.box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. self.box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) self.box_predictor = FastRCNNOutputLayers(self.box_head.output_size, self.num_classes, self.cls_agnostic_bbox_reg)
def __init__(self, cfg, input_shape): super().__init__(cfg, input_shape) assert len(self.in_features) == 1 # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE pooler_scales = (1.0 / input_shape[self.in_features[0]].stride, ) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO self.mask_on = cfg.MODEL.MASK_ON # fmt: on assert not cfg.MODEL.KEYPOINT_ON self.pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.res5, out_channels = self._build_res5_block(cfg) self.box_predictor = FastRCNNOutputLayers(out_channels, self.num_classes, self.cls_agnostic_bbox_reg) if self.mask_on: self.mask_head = build_mask_head( cfg, ShapeSpec(channels=out_channels, width=pooler_resolution, height=pooler_resolution), )
def output_shape(self): """ Returns: dict[str->ShapeSpec] """ # this is a backward-compatible default return { name: ShapeSpec(channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]) for name in self._out_features }
def _init_box_head(self, cfg, input_shape): # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS cascade_ious = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS self.num_cascade_stages = len(cascade_ious) assert len(cascade_bbox_reg_weights) == self.num_cascade_stages assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, \ "CascadeROIHeads only support class-agnostic regression now!" assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0] # fmt: on in_channels = [input_shape[f].channels for f in self.in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] self.box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) pooled_shape = ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) self.box_head = nn.ModuleList() self.box_predictor = nn.ModuleList() self.box2box_transform = [] self.proposal_matchers = [] for k in range(self.num_cascade_stages): box_head = build_box_head(cfg, pooled_shape) self.box_head.append(box_head) self.box_predictor.append( FastRCNNOutputLayers(box_head.output_size, self.num_classes, cls_agnostic_bbox_reg=True)) self.box2box_transform.append( Box2BoxTransform(weights=cascade_bbox_reg_weights[k])) if k == 0: # The first matching is done by the matcher of ROIHeads (self.proposal_matcher). self.proposal_matchers.append(None) else: self.proposal_matchers.append( Matcher([cascade_ious[k]], [0, 1], allow_low_quality_matches=False))
def build_backbone(cfg, input_shape=None): """ Build a backbone from `cfg.MODEL.BACKBONE.NAME`. Returns: an instance of :class:`Backbone` """ if input_shape is None: input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) backbone_name = cfg.MODEL.BACKBONE.NAME backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape) assert isinstance(backbone, Backbone) return backbone
def _init_point_head(self, cfg): # fmt: off self.mask_point_on = cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON if not self.mask_point_on: return assert cfg.MODEL.ROI_HEADS.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES self.mask_point_in_features = cfg.MODEL.POINT_HEAD.IN_FEATURES self.mask_point_train_num_points = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS self.mask_point_oversample_ratio = cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO self.mask_point_importance_sample_ratio = cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO # next two parameters are use in the adaptive subdivions inference procedure self.mask_point_subdivision_steps = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS self.mask_point_subdivision_num_points = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS # fmt: on in_channels = np.sum( [self.feature_channels[f] for f in self.mask_point_in_features]) self.mask_point_head = build_point_head( cfg, ShapeSpec(channels=in_channels, width=1, height=1))
def _init_mask_head(self, cfg): # fmt: off self.mask_on = cfg.MODEL.MASK_ON if not self.mask_on: return self.mask_coarse_in_features = cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES self.mask_coarse_side_size = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION # fmt: on in_channels = np.sum( [self.feature_channels[f] for f in self.mask_coarse_in_features]) self.mask_coarse_head = build_mask_head( cfg, ShapeSpec( channels=in_channels, width=self.mask_coarse_side_size, height=self.mask_coarse_side_size, ), ) self._init_point_head(cfg)
def test_rrpn_anchor_generator(self): cfg = get_cfg() cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]] cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 45]] anchor_generator = RotatedAnchorGenerator(cfg, [ShapeSpec(stride=4)]) # only the last two dimensions of features matter here num_images = 2 features = {"stage3": torch.rand(num_images, 96, 1, 2)} anchors = anchor_generator([features["stage3"]]) expected_anchor_tensor = torch.tensor([ [0.0, 0.0, 64.0, 16.0, 0.0], [0.0, 0.0, 64.0, 16.0, 45.0], [0.0, 0.0, 32.0, 32.0, 0.0], [0.0, 0.0, 32.0, 32.0, 45.0], [0.0, 0.0, 16.0, 64.0, 0.0], [0.0, 0.0, 16.0, 64.0, 45.0], [0.0, 0.0, 128.0, 32.0, 0.0], [0.0, 0.0, 128.0, 32.0, 45.0], [0.0, 0.0, 64.0, 64.0, 0.0], [0.0, 0.0, 64.0, 64.0, 45.0], [0.0, 0.0, 32.0, 128.0, 0.0], [0.0, 0.0, 32.0, 128.0, 45.0], [4.0, 0.0, 64.0, 16.0, 0.0], # 4.0 == 0.0 + STRIDE (4) [4.0, 0.0, 64.0, 16.0, 45.0], [4.0, 0.0, 32.0, 32.0, 0.0], [4.0, 0.0, 32.0, 32.0, 45.0], [4.0, 0.0, 16.0, 64.0, 0.0], [4.0, 0.0, 16.0, 64.0, 45.0], [4.0, 0.0, 128.0, 32.0, 0.0], [4.0, 0.0, 128.0, 32.0, 45.0], [4.0, 0.0, 64.0, 64.0, 0.0], [4.0, 0.0, 64.0, 64.0, 45.0], [4.0, 0.0, 32.0, 128.0, 0.0], [4.0, 0.0, 32.0, 128.0, 45.0], ]) for i in range(num_images): assert torch.allclose(anchors[i][0].tensor, expected_anchor_tensor)
def _init_mask_head(self, cfg, input_shape): # fmt: off self.mask_on = cfg.MODEL.MASK_ON if not self.mask_on: return self.mask_coarse_in_features = cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES self.mask_coarse_side_size = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION self._feature_scales = { k: 1.0 / v.stride for k, v in input_shape.items() } # fmt: on in_channels = np.sum( [input_shape[f].channels for f in self.mask_coarse_in_features]) self.mask_coarse_head = build_mask_head( cfg, ShapeSpec( channels=in_channels, width=self.mask_coarse_side_size, height=self.mask_coarse_side_size, ), ) self._init_point_head(cfg, input_shape)
def _init_box_head(self, cfg, input_shape): # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in self.in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] assert pooler_type in ["ROIAlignRotated"] self.box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) self.box_predictor = FastRCNNOutputLayers( input_size=self.box_head.output_size, num_classes=self.num_classes, cls_agnostic_bbox_reg=self.cls_agnostic_bbox_reg, box_dim=5, )