def __init__(self, cfg): # fmt: off self.pooler_resolution = cfg.MODEL.BLENDMASK.BOTTOM_RESOLUTION sampling_ratio = cfg.MODEL.BLENDMASK.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.BLENDMASK.POOLER_TYPE pooler_scales = cfg.MODEL.BLENDMASK.POOLER_SCALES self.attn_size = cfg.MODEL.BLENDMASK.ATTN_SIZE self.top_interp = cfg.MODEL.BLENDMASK.TOP_INTERP num_bases = cfg.MODEL.BASIS_MODULE.NUM_BASES # fmt: on self.attn_len = num_bases * self.attn_size * self.attn_size self.pooler = ROIPooler( output_size=self.pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, canonical_level=2) self.pooler_p5 = ROIPooler( output_size=self.pooler_resolution // 4, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, canonical_level=2 )
def from_config(cls, cfg, input_shape): ret = super().from_config(cfg) in_features = ret["in_features"] = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_scales = [1.0 / input_shape[lvl].stride for lvl in in_features] sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO ret["box_pooler"] = ROIPooler( output_size=cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE, ) ret["mask_pooler"] = ROIPooler( output_size=cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE, ) ret["box_head"] = BboxNetwork(cfg) ret["box_predictor"] = FastRCNNOutputLayers( cfg, ShapeSpec(channels=1024, height=1, width=1) ) # Mask Network + Head ret["mask_head"] = MaskNetwork(cfg.MODEL.ROI_HEADS.NUM_CLASSES) return ret
def test_no_images(self): N, C, H, W = 0, 32, 32, 32 feature = torch.rand(N, C, H, W) - 0.5 features = [feature] pooler = ROIPooler( output_size=14, scales=(1.0,), sampling_ratio=0.0, pooler_type="ROIAlignV2" ) output = pooler.forward(features, []) self.assertEqual(output.shape, (0, C, 14, 14))
def _test_roialignv2_roialignrotated_match(self, device): pooler_resolution = 14 canonical_level = 4 canonical_scale_factor = 2**canonical_level pooler_scales = (1.0 / canonical_scale_factor, ) sampling_ratio = 0 N, C, H, W = 2, 4, 10, 8 N_rois = 10 std = 11 mean = 0 feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean features = [feature.to(device)] rois = [] rois_rotated = [] for _ in range(N): boxes = self._rand_boxes(num_boxes=N_rois, x_max=W * canonical_scale_factor, y_max=H * canonical_scale_factor) rotated_boxes = torch.zeros(N_rois, 5) rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] rois.append(Boxes(boxes).to(device)) rois_rotated.append(RotatedBoxes(rotated_boxes).to(device)) roialignv2_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type="ROIAlignV2", ) roialignv2_out = roialignv2_pooler(features, rois) roialignrotated_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type="ROIAlignRotated", ) roialignrotated_out = roialignrotated_pooler(features, rois_rotated) self.assertTrue( torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4))
def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution) ) box_predictor = WSDDNOutputLayers(cfg, box_head.output_shape) return { "box_in_features": in_features, "box_pooler": box_pooler, "box_head": box_head, "box_predictor": box_predictor, }
def __init__(self, cfg, input_shape): super().__init__() in_features = cfg.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.POOLER_TYPE in_channels = [input_shape[f].channels for f in in_features] in_channels = in_channels[0] self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES self.box_in_features = in_features self.meta_box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.meta_box_head = build_box_head(cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) input_shape_box = self.meta_box_head.output_shape if isinstance(input_shape_box, int): # some backward compatibility input_shape_box = ShapeSpec(channels=input_shape_box) input_size = input_shape_box.channels * (input_shape_box.width or 1) * (input_shape_box.height or 1) self.input_size = input_size self.pi_normalizer = 0.5 * input_size * np.log(2 * np.pi) self.rank_loss_classifier = Linear(input_size, self.num_classes + 1) nn.init.normal_(self.rank_loss_classifier.weight, std=0.01) nn.init.constant_(self.rank_loss_classifier.bias, 0.0)
def _init_dp_keypoint_head(self, cfg,input_shape): # fmt: off self.dp_keypoint_on = cfg.MODEL.ROI_DENSEPOSE_HEAD.KPT_ON if not self.dp_keypoint_on: return self.normalize_loss_by_visible_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS # noqa self.keypoint_loss_weight = cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION # dp_multi_pooler_res = ((28,28),(14,14),(14,14),(7,7)) dp_pooler_scales = tuple(1.0 / self.feature_strides[k] for k in self.in_features) dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE in_channels = [self.feature_channels[f] for f in self.in_features][0] if not self.densepose_on: self.use_mid = cfg.MODEL.ROI_DENSEPOSE_HEAD.MID_ON if cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME == 'DensePoseAMAHead': self.densepose_pooler = MultiROIPooler( output_size=[[28, 28], [14, 14], [14, 14], [7, 7]], # output_size=[[28, 28], [28, 28], [28, 28], [28, 28]], scales=dp_pooler_scales, sampling_ratio=dp_pooler_sampling_ratio, pooler_type=dp_pooler_type, ) else: self.densepose_pooler = ROIPooler( output_size=dp_pooler_resolution, scales=dp_pooler_scales, sampling_ratio=dp_pooler_sampling_ratio, pooler_type=dp_pooler_type, ) self.densepose_head = build_densepose_head(cfg, in_channels) # print(self.densepose_head) self.keypoint_predictor = DensePoseKeypointsPredictor(cfg, self.densepose_head.n_out_channels)
def _init_box_head(self, cfg, input_shape): # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE self.train_on_pred_boxes = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES self.attribute_on = cfg.MODEL.ATTRIBUTE_ON # fmt: on in_channels = [input_shape[f].channels for f in self.in_features] assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] self.box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.box_head = build_box_head( cfg, ShapeSpec( channels=in_channels, height=pooler_resolution, width=pooler_resolution ), ) self.box_predictor = FastRCNNOutputLayers(cfg, self.box_head.output_shape) if self.attribute_on: self.attribute_predictor = AttributePredictor( cfg, self.box_head.output_shape.channels )
def _init_keypoint_head(self, cfg): # fmt: off self.keypoint_on = cfg.MODEL.KEYPOINT_ON if not self.keypoint_on: return pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / self.feature_strides[k] for k in self.in_features) # noqa sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE self.normalize_loss_by_visible_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS # noqa self.keypoint_loss_weight = cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT # fmt: on in_channels = [self.feature_channels[f] for f in self.in_features][0] self.keypoint_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.keypoint_head = build_keypoint_head( cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution))
def _init_box_head(self, cfg, input_shape): # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE self.train_on_pred_boxes = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES # fmt: on # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in self.in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] self.box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution), self.num_classes, self.cls_agnostic_bbox_reg, )
def _init_mask_head(cfg, input_shape): in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE in_channels = [input_shape[f].channels for f in in_features][0] mask_pooler = ( ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) if pooler_type else None ) if pooler_type: shape = ShapeSpec( channels=in_channels, width=pooler_resolution, height=pooler_resolution ) else: shape = {f: input_shape[f] for f in in_features} return mask_pooler, build_mask_head(cfg, shape)
def _init_keypoint_head(cls, cfg, input_shape): if not cfg.MODEL.KEYPOINT_ON: return {} # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) # noqa sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE # fmt: on in_channels = [input_shape[f].channels for f in in_features][0] ret = {"keypoint_in_features": in_features} ret["keypoint_pooler"] = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) ret["keypoint_head"] = build_keypoint_head( cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution)) return ret
def _init_z_head(self, cfg): # fmt: off self.zpred_on = cfg.MODEL.ZPRED_ON if not self.zpred_on: return z_pooler_resolution = cfg.MODEL.ROI_Z_HEAD.POOLER_RESOLUTION z_pooler_scales = tuple(1.0 / self.feature_strides[k] for k in self.in_features) z_sampling_ratio = cfg.MODEL.ROI_Z_HEAD.POOLER_SAMPLING_RATIO z_pooler_type = cfg.MODEL.ROI_Z_HEAD.POOLER_TYPE # fmt: on self.z_loss_weight = cfg.MODEL.ROI_Z_HEAD.Z_REG_WEIGHT self.z_smooth_l1_beta = cfg.MODEL.ROI_Z_HEAD.SMOOTH_L1_BETA in_channels = [self.feature_channels[f] for f in self.in_features][0] self.z_pooler = ROIPooler( output_size=z_pooler_resolution, scales=z_pooler_scales, sampling_ratio=z_sampling_ratio, pooler_type=z_pooler_type, ) shape = ShapeSpec(channels=in_channels, width=z_pooler_resolution, height=z_pooler_resolution) self.z_head = build_z_head(cfg, shape)
def _init_box_head(self, cfg): # fmt: off self.in_features_box = cfg.MODEL.ROI_BOX_HEAD.IN_FEATURES if cfg.MODEL.ROI_BOX_HEAD.IN_FEATURES else self.in_features box_pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION box_pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE self.box_pooler_scales = list(1.0 / self.feature_strides[k] for k in self.in_features_box) box_sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO # fmt: on assert len(set(self.in_features_box)) > 0 and set( self.in_features_box) <= set(self.in_features) self.box_pooler = ROIPooler(output_size=box_pooler_resolution, scales=self.box_pooler_scales, sampling_ratio=box_sampling_ratio, pooler_type=box_pooler_type) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. self.box_head = build_box_head( cfg, ShapeSpec(channels=self.in_channels, height=box_pooler_resolution, width=box_pooler_resolution)) self.box_predictor = FastRCNNOutputLayers(self.box_head.output_size, self.num_classes, self.cls_agnostic_bbox_reg) # FOR ADAPTIVE POOLING self.box_min_level = int(-math.log2(self.box_pooler_scales[0])) self.box_max_level = int(-math.log2(self.box_pooler_scales[-1])) self.box_canonical_box_size = self.box_pooler.canonical_box_size self.box_canonical_level = self.box_pooler.canonical_level
def _test_scriptability(self, device): pooler_resolution = 14 canonical_level = 4 canonical_scale_factor = 2**canonical_level pooler_scales = (1.0 / canonical_scale_factor, ) sampling_ratio = 0 N, C, H, W = 2, 4, 10, 8 N_rois = 10 std = 11 mean = 0 feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean features = [feature.to(device)] rois = [] for _ in range(N): boxes = random_boxes(N_rois, W * canonical_scale_factor) rois.append(Boxes(boxes).to(device)) roialignv2_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type="ROIAlignV2", ) roialignv2_out = roialignv2_pooler(features, rois) scripted_roialignv2_out = torch.jit.script(roialignv2_pooler)(features, rois) self.assertTrue(torch.equal(roialignv2_out, scripted_roialignv2_out))
def _init_shared_block(self, cfg): # fmt: off self.in_features_shared = cfg.MODEL.ROI_SHARED_HEAD.IN_FEATURES if cfg.MODEL.ROI_SHARED_HEAD.IN_FEATURES else self.in_features self.shared_pooler_resolution = cfg.MODEL.ROI_SHARED_HEAD.POOLER_RESOLUTION shared_pooler_type = cfg.MODEL.ROI_SHARED_HEAD.POOLER_TYPE self.shared_pooler_scales = list(1.0 / self.feature_strides[k] for k in self.in_features_shared) shared_sampling_ratio = cfg.MODEL.ROI_SHARED_HEAD.POOLER_SAMPLING_RATIO # fmt: on assert len(set(self.in_features_shared)) > 0 and set( self.in_features_shared) <= set(self.in_features) self.shared_pooler = ROIPooler( output_size=self.shared_pooler_resolution, scales=self.shared_pooler_scales, sampling_ratio=shared_sampling_ratio, pooler_type=shared_pooler_type) self.shared_block, self.shared_out_channels = build_shared_block( cfg, self.in_channels) # FOR ADAPTIVE POOLING self.shared_min_level = int(-math.log2(self.shared_pooler_scales[0])) self.shared_max_level = int(-math.log2(self.shared_pooler_scales[-1])) self.shared_canonical_box_size = self.shared_pooler.canonical_box_size self.shared_canonical_level = self.shared_pooler.canonical_level
def _init_dp_keypoint_head(self, cfg, input_shape): # fmt: off self.dp_keypoint_on = cfg.MODEL.ROI_DENSEPOSE_HEAD.KPT_ON if not self.dp_keypoint_on: return self.normalize_loss_by_visible_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS # noqa self.keypoint_loss_weight = cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION dp_pooler_scales = tuple(1.0 / self.feature_strides[k] for k in self.in_features) dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE in_channels = [self.feature_channels[f] for f in self.in_features][0] if not self.densepose_on: self.use_mid = cfg.MODEL.ROI_DENSEPOSE_HEAD.MID_ON if self.use_mid: self.mid_decoder = MultiInstanceDecoder( cfg, input_shape, self.in_features) dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride, ) self.densepose_pooler = ROIPooler( output_size=dp_pooler_resolution, scales=dp_pooler_scales, sampling_ratio=dp_pooler_sampling_ratio, pooler_type=dp_pooler_type, ) self.densepose_head = build_densepose_head(cfg, in_channels) self.keypoint_predictor = DensePoseKeypointsPredictor( cfg, self.densepose_head.n_out_channels)
def __init__(self, cfg, input_shape): super().__init__(cfg, input_shape) assert len(self.in_features) == 1 # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE pooler_scales = (1.0 / self.feature_strides[self.in_features[0]], ) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO self.attr_on = cfg.MODEL.BUA.ATTRIBUTE_ON self.extract_on = cfg.MODEL.BUA.EXTRACT_FEATS self.num_attr_classes = cfg.MODEL.BUA.ATTRIBUTE.NUM_CLASSES self.pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.box2box_transform = BUABox2BoxTransform( weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS) self.res5, out_channels = self._build_res5_block(cfg) self.box_predictor = BUACaffeFastRCNNOutputLayers( out_channels, self.num_classes, self.cls_agnostic_bbox_reg, attr_on=self.attr_on, num_attr_classes=self.num_attr_classes)
def __init__(self, cfg, use_rel_coords=True, input_shape=None): super().__init__(cfg, use_rel_coords) # self.densepose_data_filter = build_densepose_data_filter(cfg) self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES # dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION # dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE dp_pooler_sampling_ratio = 0 dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE # self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON # fmt: on # pdb.set_trace() # if self.use_decoder: # dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,) # else: # dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride, ) in_channels = [input_shape[f].channels for f in self.in_features][0] # if self.use_decoder: # self.decoder = Decoder(cfg, input_shape, self.in_features) self.densepose_pooler = ROIPooler( output_size=dp_pooler_resolution, scales=dp_pooler_scales, sampling_ratio=dp_pooler_sampling_ratio, pooler_type=dp_pooler_type, )
def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) box_predictor = build_roi_predictor(cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) return { "box_in_features": in_features, "box_pooler": box_pooler, "box_predictor": box_predictor, }
def _init_hoi_head(self, cfg, input_shape): self.hoi_on = cfg.MODEL.HOI_ON if not self.hoi_on: return # fmt: off pooler_resolution = cfg.MODEL.HOI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) sampling_ratio = cfg.MODEL.HOI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.HOI_BOX_HEAD.POOLER_TYPE allow_person_to_person = cfg.MODEL.HOI_BOX_HEAD.ALLOW_PERSON_TO_PERSON # fmt: on self.allow_person_to_person = allow_person_to_person # If StandardHOROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in self.in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] self.hoi_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.hoi_head = build_hoi_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) self.hoi_predictor = HoiOutputLayers(cfg, self.hoi_head.output_shape)
def _init_mask_head(self, cfg, input_shape): # fmt: off self.mask_on = cfg.MODEL.MASK_ON if not self.mask_on: return pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE # fmt: on in_channels = [input_shape[f].channels for f in self.in_features][0] self.mask_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.mask_head = build_mask_head( cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution))
def _init_mesh_head(self, cfg): # fmt: off self.mesh_on = cfg.MODEL.MESH_ON if not self.mesh_on: return mesh_pooler_resolution = cfg.MODEL.ROI_MESH_HEAD.POOLER_RESOLUTION mesh_pooler_scales = tuple(1.0 / self.feature_strides[k] for k in self.in_features) mesh_sampling_ratio = cfg.MODEL.ROI_MESH_HEAD.POOLER_SAMPLING_RATIO mesh_pooler_type = cfg.MODEL.ROI_MESH_HEAD.POOLER_TYPE # fmt: on self.chamfer_loss_weight = cfg.MODEL.ROI_MESH_HEAD.CHAMFER_LOSS_WEIGHT self.normals_loss_weight = cfg.MODEL.ROI_MESH_HEAD.NORMALS_LOSS_WEIGHT self.edge_loss_weight = cfg.MODEL.ROI_MESH_HEAD.EDGE_LOSS_WEIGHT self.gt_num_samples = cfg.MODEL.ROI_MESH_HEAD.GT_NUM_SAMPLES self.pred_num_samples = cfg.MODEL.ROI_MESH_HEAD.PRED_NUM_SAMPLES self.gt_coord_thresh = cfg.MODEL.ROI_MESH_HEAD.GT_COORD_THRESH self.ico_sphere_level = cfg.MODEL.ROI_MESH_HEAD.ICO_SPHERE_LEVEL in_channels = [self.feature_channels[f] for f in self.in_features][0] self.mesh_pooler = ROIPooler( output_size=mesh_pooler_resolution, scales=mesh_pooler_scales, sampling_ratio=mesh_sampling_ratio, pooler_type=mesh_pooler_type, ) self.mesh_head = build_mesh_head( cfg, ShapeSpec(channels=in_channels, height=mesh_pooler_resolution, width=mesh_pooler_resolution), )
def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on assert pooler_type in ["ROIAlignRotated"], pooler_type # assume all channel counts are equal in_channels = [input_shape[f].channels for f in in_features][0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) # This line is the only difference v.s. StandardROIHeads box_predictor = RotatedFastRCNNOutputLayers(cfg, box_head.output_shape) return { "box_in_features": in_features, "box_pooler": box_pooler, "box_head": box_head, "box_predictor": box_predictor, }
def _init_voxel_head(self, cfg): # fmt: off self.voxel_on = cfg.MODEL.VOXEL_ON if not self.voxel_on: return voxel_pooler_resolution = cfg.MODEL.ROI_VOXEL_HEAD.POOLER_RESOLUTION voxel_pooler_scales = tuple(1.0 / self.feature_strides[k] for k in self.in_features) voxel_sampling_ratio = cfg.MODEL.ROI_VOXEL_HEAD.POOLER_SAMPLING_RATIO voxel_pooler_type = cfg.MODEL.ROI_VOXEL_HEAD.POOLER_TYPE # fmt: on self.voxel_loss_weight = cfg.MODEL.ROI_VOXEL_HEAD.LOSS_WEIGHT self.cls_agnostic_voxel = cfg.MODEL.ROI_VOXEL_HEAD.CLS_AGNOSTIC_VOXEL self.cubify_thresh = cfg.MODEL.ROI_VOXEL_HEAD.CUBIFY_THRESH in_channels = [self.feature_channels[f] for f in self.in_features][0] self.voxel_pooler = ROIPooler( output_size=voxel_pooler_resolution, scales=voxel_pooler_scales, sampling_ratio=voxel_sampling_ratio, pooler_type=voxel_pooler_type, ) shape = ShapeSpec(channels=in_channels, width=voxel_pooler_resolution, height=voxel_pooler_resolution) self.voxel_head = build_voxel_head(cfg, shape)
def _init_plane_head(self, cfg, input_shape): self.plane_on = cfg.MODEL.PLANE_ON if not self.plane_on: return plane_pooler_resolution = cfg.MODEL.ROI_PLANE_HEAD.POOLER_RESOLUTION plane_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) plane_sampling_ratio = cfg.MODEL.ROI_PLANE_HEAD.POOLER_SAMPLING_RATIO plane_pooler_type = cfg.MODEL.ROI_PLANE_HEAD.POOLER_TYPE in_channels = [input_shape[f].channels for f in self.in_features][0] self.plane_pooler = ROIPooler( output_size=plane_pooler_resolution, scales=plane_pooler_scales, sampling_ratio=plane_sampling_ratio, pooler_type=plane_pooler_type, ) shape = ShapeSpec( channels=in_channels, width=plane_pooler_resolution, height=plane_pooler_resolution, ) self.plane_head = build_plane_head(cfg, shape)
def _init_densepose_head(self, cfg, input_shape): # fmt: off self.densepose_on = cfg.MODEL.DENSEPOSE_ON if not self.densepose_on: return self.densepose_data_filter = build_densepose_data_filter(cfg) dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON if self.use_decoder: dp_pooler_scales = (1.0 / self.feature_strides[self.in_features[0]], ) else: dp_pooler_scales = tuple(1.0 / self.feature_strides[k] for k in self.in_features) # fmt: on in_channels = [self.feature_channels[f] for f in self.in_features][0] if self.use_decoder: self.decoder = Decoder(cfg, input_shape, self.in_features) self.densepose_pooler = ROIPooler( output_size=dp_pooler_resolution, scales=dp_pooler_scales, sampling_ratio=dp_pooler_sampling_ratio, pooler_type=dp_pooler_type, ) self.densepose_head = build_densepose_head(cfg, in_channels) self.densepose_predictor = build_densepose_predictor( cfg, self.densepose_head.n_out_channels) self.densepose_losses = build_densepose_losses(cfg)
def _init_box_head(self, cfg): # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / self.feature_strides[k] for k in self.in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [self.feature_channels[f] for f in self.in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] self.box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. self.box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) self.box_predictor = FastRCNNOutputLayers(self.box_head.output_size, self.num_classes, self.cls_agnostic_bbox_reg)
def __init__(self, cfg, input_shape): super().__init__(cfg) # fmt: off self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE pooler_scales = (1.0 / input_shape[self.in_features[0]].stride, ) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO self.mask_on = cfg.MODEL.MASK_ON # fmt: on assert not cfg.MODEL.KEYPOINT_ON assert len(self.in_features) == 1 self.pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.res5, out_channels = self._build_res5_block(cfg) self.box_predictor = FsodFastRCNNOutputLayers( cfg, ShapeSpec(channels=out_channels, height=1, width=1) )
def _init_mask_head(cls, cfg, input_shape): if not cfg.MODEL.MASK_ON: return {} # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE # fmt: on in_channels = [input_shape[f].channels for f in in_features][0] ret = {"mask_in_features": in_features} ret["mask_pooler"] = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) ret["mask_head"] = build_mask_head( cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) ) if cfg.MODEL.WITH_GAN: ret["discriminator"] = Discriminator() else: ret["discriminator"] = None return ret