def test_scriptability(self): cfg = RCNNConfig() anchor_matcher = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True) match_quality_matrix = torch.tensor([[0.15, 0.45, 0.2, 0.6], [0.3, 0.65, 0.05, 0.1], [0.05, 0.4, 0.25, 0.4]]) expected_matches = torch.tensor([1, 1, 2, 0]) expected_match_labels = torch.tensor([-1, 1, 0, 1], dtype=torch.int8) matches, match_labels = anchor_matcher(match_quality_matrix) self.assertTrue(torch.allclose(matches, expected_matches)) self.assertTrue(torch.allclose(match_labels, expected_match_labels)) # nonzero_tuple must be import explicitly to let jit know what it is. # https://github.com/pytorch/pytorch/issues/38964 from detectron2.layers import nonzero_tuple # noqa F401 def f(thresholds: List[float], labels: List[int]): return Matcher(thresholds, labels, allow_low_quality_matches=True) scripted_anchor_matcher = torch.jit.script(f)( cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS) matches, match_labels = scripted_anchor_matcher(match_quality_matrix) self.assertTrue(torch.allclose(matches, expected_matches)) self.assertTrue(torch.allclose(match_labels, expected_match_labels))
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) # fmt: off self.image_size = cfg.MODEL.SSD.IMAGE_SIZE self.num_classes = cfg.MODEL.SSD.NUM_CLASSES self.in_features = cfg.MODEL.SSD.IN_FEATURES self.extra_layer_arch = cfg.MODEL.SSD.EXTRA_LAYER_ARCH[str(self.image_size)] self.l2norm_scale = cfg.MODEL.SSD.L2NORM_SCALE # Loss parameters: self.loss_alpha = cfg.MODEL.SSD.LOSS_ALPHA self.smooth_l1_loss_beta = cfg.MODEL.SSD.SMOOTH_L1_LOSS_BETA self.negative_positive_ratio = cfg.MODEL.SSD.NEGATIVE_POSITIVE_RATIO # Inference parameters: self.score_threshold = cfg.MODEL.SSD.SCORE_THRESH_TEST self.nms_threshold = cfg.MODEL.SSD.NMS_THRESH_TEST self.nms_type = cfg.MODEL.NMS_TYPE self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE # fmt: on self.backbone = cfg.build_backbone( cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] # build extra layers self.extra_layers = self._make_extra_layers( feature_shapes[-1].channels, self.extra_layer_arch) extra_layer_channels = [c for c in self.extra_layer_arch if isinstance(c, int)] feature_shapes += [ShapeSpec(channels=c) for c in extra_layer_channels[1::2]] # ssd head self.head = SSDHead(cfg, feature_shapes) self.l2norm = L2Norm(512, self.l2norm_scale) self.default_box_generator = cfg.build_default_box_generator(cfg) self.default_boxes = self.default_box_generator() # Matching and loss self.box2box_transform = Box2BoxTransform( weights=cfg.MODEL.SSD.BBOX_REG_WEIGHTS) self.matcher = Matcher( cfg.MODEL.SSD.IOU_THRESHOLDS, cfg.MODEL.SSD.IOU_LABELS, allow_low_quality_matches=False, ) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device) # Initialization self._init_weights()
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.num_classes = cfg.MODEL.EFFICIENTDET.NUM_CLASSES self.in_features = cfg.MODEL.EFFICIENTDET.IN_FEATURES self.freeze_bn = cfg.MODEL.EFFICIENTDET.FREEZE_BN self.freeze_backbone = cfg.MODEL.EFFICIENTDET.FREEZE_BACKBONE self.input_size = cfg.MODEL.BIFPN.INPUT_SIZE # Loss parameters: self.focal_loss_alpha = cfg.MODEL.EFFICIENTDET.FOCAL_LOSS_ALPHA self.focal_loss_gamma = cfg.MODEL.EFFICIENTDET.FOCAL_LOSS_GAMMA self.smooth_l1_loss_beta = cfg.MODEL.EFFICIENTDET.SMOOTH_L1_LOSS_BETA self.box_loss_weight = cfg.MODEL.EFFICIENTDET.BOX_LOSS_WEIGHT self.regress_norm = cfg.MODEL.EFFICIENTDET.REG_NORM # Inference parameters: self.score_threshold = cfg.MODEL.EFFICIENTDET.SCORE_THRESH_TEST self.topk_candidates = cfg.MODEL.EFFICIENTDET.TOPK_CANDIDATES_TEST self.nms_threshold = cfg.MODEL.EFFICIENTDET.NMS_THRESH_TEST self.nms_type = cfg.MODEL.NMS_TYPE self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE self.backbone = cfg.build_backbone( cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] self.head = EfficientDetHead(cfg, feature_shapes) self.anchor_generator = cfg.build_anchor_generator(cfg, feature_shapes) # Matching and loss self.box2box_transform = Box2BoxTransform( weights=cfg.MODEL.EFFICIENTDET.BBOX_REG_WEIGHTS) self.matcher = Matcher( cfg.MODEL.EFFICIENTDET.IOU_THRESHOLDS, cfg.MODEL.EFFICIENTDET.IOU_LABELS, allow_low_quality_matches=False, ) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x / 255. - pixel_mean) / pixel_std if self.freeze_bn: for layer in self.modules(): if isinstance(layer, nn.BatchNorm2d): layer.eval() if self.freeze_backbone: for name, params in self.named_parameters(): if name.startswith("backbone.bottom_up"): params.requires_grad = False self.to(self.device)
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) # fmt: off self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES self.in_features = cfg.MODEL.RETINANET.IN_FEATURES # Loss parameters: self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA # Inference parameters: self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST self.nms_type = cfg.MODEL.NMS_TYPE self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE # fmt: on self.backbone = cfg.build_backbone( cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] self.head = RetinaNetHead(cfg, feature_shapes) self.anchor_generator = cfg.build_anchor_generator(cfg, feature_shapes) # Matching and loss self.box2box_transform = Box2BoxTransform( weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS) self.matcher = Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, ) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device) """ In Detectron1, loss is normalized by number of foreground samples in the batch. When batch size is 1 per GPU, #foreground has a large variance and using it lead to lower performance. Here we maintain an EMA of #foreground to stabilize the normalizer. """ self.loss_normalizer = 100 # initialize with any reasonable #fg that's not too small self.loss_normalizer_momentum = 0.9
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) # fmt: off self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES self.in_features = cfg.MODEL.RETINANET.IN_FEATURES # Loss parameters: self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA # Inference parameters: self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST self.nms_type = cfg.MODEL.NMS_TYPE self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE # fmt: on self.backbone = cfg.build_backbone( cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] self.head = RetinaNetHead(cfg, feature_shapes) self.anchor_generator = cfg.build_anchor_generator(cfg, feature_shapes) # Matching and loss self.box2box_transform = Box2BoxTransform( weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS) self.matcher = Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, ) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def f(thresholds: List[float], labels: List[int]): return Matcher(thresholds, labels, allow_low_quality_matches=True)