def __init__(self, cfg): super().__init__(cfg) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] self.head = PointRetinaNetHead(cfg, feature_shapes) grid = uniform_grid(2048) self.register_buffer("grid", grid) self.num_points = cfg.MODEL.PROPOSAL_GENERATOR.NUM_POINTS self.point_strides = [8, 16, 32, 64, 128] self.loss_normalizer = 20 # initialize with any reasonable #fg that's not too small self.loss_normalizer_momentum = 0.9 self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES self.in_features = cfg.MODEL.RETINANET.IN_FEATURES input_shape = self.backbone.output_shape() self.strides = [input_shape[f].stride for f in self.in_features] # Assigning init box labels. if cfg.MODEL.PROPOSAL_GENERATOR.SAMPLE_MODE == 'points': from slender_det.modeling.matchers.rep_matcher import rep_points_match_with_classes self.matcher = rep_points_match_with_classes elif cfg.MODEL.PROPOSAL_GENERATOR.SAMPLE_MODE == 'nearest_points': from slender_det.modeling.matchers.rep_matcher import nearest_point_match self.matcher = nearest_point_match else: assert cfg.MODEL.PROPOSAL_GENERATOR.SAMPLE_MODE == 'inside' from slender_det.modeling.matchers.rep_matcher import inside_match self.matcher = inside_match # Used for matching refine box labels. self.bbox_matcher = Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, )
def setup(file): # get cfg cfg = get_cfg() cfg.merge_from_file(file) cfg.SOLVER.IMS_PER_BATCH = 2 # get data loader iter data_loader = build_detection_train_loader(cfg) data_loader_iter = iter(data_loader) batched_inputs = next(data_loader_iter) # build anchors backbone = build_backbone(cfg).to(device) images = [x["image"].to(device) for x in batched_inputs] images = ImageList.from_tensors(images, backbone.size_divisibility) features = backbone(images.tensor.float()) input_shape = backbone.output_shape() in_features = cfg.MODEL.RPN.IN_FEATURES anchor_generator = build_anchor_generator( cfg, [input_shape[f] for f in in_features]) anchors = anchor_generator([features[f] for f in in_features]) anchors = Boxes.cat(anchors).to(device) # build matcher raw_matcher = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True) matcher = TopKMatcher(cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, 9) return cfg, data_loader_iter, anchors, matcher, raw_matcher
def test_scriptability(self): cfg = get_cfg() anchor_matcher = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True) match_quality_matrix = torch.tensor([[0.15, 0.45, 0.2, 0.6], [0.3, 0.65, 0.05, 0.1], [0.05, 0.4, 0.25, 0.4]]) expected_matches = torch.tensor([1, 1, 2, 0]) expected_match_labels = torch.tensor([-1, 1, 0, 1], dtype=torch.int8) matches, match_labels = anchor_matcher(match_quality_matrix) self.assertTrue(torch.allclose(matches, expected_matches)) self.assertTrue(torch.allclose(match_labels, expected_match_labels)) # nonzero_tuple must be import explicitly to let jit know what it is. # https://github.com/pytorch/pytorch/issues/38964 from detectron2.layers import nonzero_tuple # noqa F401 def f(thresholds: List[float], labels: List[int]): return Matcher(thresholds, labels, allow_low_quality_matches=True) scripted_anchor_matcher = torch.jit.script(f)( cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS) matches, match_labels = scripted_anchor_matcher(match_quality_matrix) self.assertTrue(torch.allclose(matches, expected_matches)) self.assertTrue(torch.allclose(match_labels, expected_match_labels))
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super().__init__() # fmt: off self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE self.in_features = cfg.MODEL.RPN.IN_FEATURES self.nms_thresh = cfg.MODEL.RPN.NMS_THRESH self.batch_size_per_image = cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE self.positive_fraction = cfg.MODEL.RPN.POSITIVE_FRACTION self.smooth_l1_beta = cfg.MODEL.RPN.SMOOTH_L1_BETA self.loss_weight = cfg.MODEL.RPN.LOSS_WEIGHT # fmt: on # Map from self.training state to train/test settings self.pre_nms_topk = { True: cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN, False: cfg.MODEL.RPN.PRE_NMS_TOPK_TEST, } self.post_nms_topk = { True: cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN, False: cfg.MODEL.RPN.POST_NMS_TOPK_TEST, } self.boundary_threshold = cfg.MODEL.RPN.BOUNDARY_THRESH self.anchor_generator = build_anchor_generator( cfg, [input_shape[f] for f in self.in_features] ) self.box2box_transform = BUABox2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) self.anchor_matcher = Matcher( cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True ) self.rpn_head = build_rpn_head(cfg, [input_shape[f] for f in self.in_features])
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super(ROIHeads, self).__init__() # fmt: off self.batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE self.positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION self.test_score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST self.test_nms_thresh = cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST self.test_detections_per_img = cfg.TEST.DETECTIONS_PER_IMAGE self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES self.proposal_append_gt = cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT self.feature_strides = {k: v.stride for k, v in input_shape.items()} self.feature_channels = {k: v.channels for k, v in input_shape.items()} self.cls_agnostic_bbox_reg = cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG self.smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA # fmt: on # Matcher to assign box proposals to gt boxes self.proposal_matcher = Matcher( cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, cfg.MODEL.ROI_HEADS.IOU_LABELS, allow_low_quality_matches=False, ) # Box2BoxTransform for bounding box regression self.box2box_transform = Box2BoxTransform( weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS)
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super(StandardHOROIHeads, self).__init__() # fmt: off self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES self.proposal_append_gt = cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES self.num_actions = cfg.MODEL.ROI_HEADS.NUM_ACTIONS self.box_batch_size_per_image = cfg.MODEL.ROI_HEADS.BOX_BATCH_SIZE_PER_IMAGE self.hoi_batch_size_per_image = cfg.MODEL.ROI_HEADS.HOI_BATCH_SIZE_PER_IMAGE self.box_positive_sample_fraction = cfg.MODEL.ROI_HEADS.BOX_POSITIVE_FRACTION self.hoi_positive_sample_fraction = cfg.MODEL.ROI_HEADS.HOI_POSITIVE_FRACTION self.compose_learning = cfg.MODEL.ROI_HEADS.CL self.cl_weight = cfg.MODEL.ROI_HEADS.CL_WEIGHT self.is_hoi_prediction = cfg.MODEL.ROI_HEADS.IS_HOI_PRED self.remove_obj_det = cfg.MODEL.ROI_HEADS.REMVOE_OBJ_ONLY_DET self.obj_image_nums = cfg.MODEL.ROI_HEADS.OBJ_IMG_NUMS # fmt: on # Matcher to assign box proposals to gt boxes self.proposal_matcher = Matcher( cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, cfg.MODEL.ROI_HEADS.IOU_LABELS, allow_low_quality_matches=False, ) self._init_box_head(cfg, input_shape) self._init_hoi_head(cfg, input_shape) verb_to_HO_matrix, obj_to_HO_matrix = get_convert_matrix( obj_class_num=81) self.verb_to_HO_matrix = torch.from_numpy(verb_to_HO_matrix) self.obj_to_HO_matrix = torch.from_numpy(obj_to_HO_matrix)
def from_config(cls, cfg, input_shape): ret = super().from_config(cfg) ret["train_on_pred_boxes"] = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES ret["add_noise_to_proposals"] = cfg.MODEL.ROI_BOX_HEAD.ADD_NOISE_TO_PROPOSALS ret["encoder_feature"] = cfg.MODEL.ROI_BOX_HEAD.ENCODER_FEATURE ret["random_sample_size"] = cfg.MODEL.ROI_BOX_HEAD.RANDOM_SAMPLE_SIZE ret["random_sample_size_upper_bound"] = cfg.MODEL.ROI_BOX_HEAD.RANDOM_SAMPLE_SIZE_UPPER_BOUND ret["random_sample_size_lower_bound"] = cfg.MODEL.ROI_BOX_HEAD.RANDOM_SAMPLE_SIZE_LOWER_BOUND ret["random_proposal_drop"] = cfg.MODEL.ROI_BOX_HEAD.RANDOM_PROPOSAL_DROP ret["random_proposal_drop_upper_bound"] = cfg.MODEL.ROI_BOX_HEAD.RANDOM_PROPOSAL_DROP_UPPER_BOUND ret["random_proposal_drop_lower_bound"] = cfg.MODEL.ROI_BOX_HEAD.RANDOM_PROPOSAL_DROP_LOWER_BOUND ret["max_proposal_per_batch"] = cfg.MODEL.ROI_BOX_HEAD.MAX_PROPOSAL_PER_BATCH # Subclasses that have not been updated to use from_config style construction # may have overridden _init_*_head methods. In this case, those overridden methods # will not be classmethods and we need to avoid trying to call them here. # We test for this with ismethod which only returns True for bound methods of cls. # Such subclasses will need to handle calling their overridden _init_*_head methods. if inspect.ismethod(cls._init_box_head): ret.update(cls._init_box_head(cfg, input_shape)) if inspect.ismethod(cls._init_mask_head): ret.update(cls._init_mask_head(cfg, input_shape)) if inspect.ismethod(cls._init_keypoint_head): ret.update(cls._init_keypoint_head(cfg, input_shape)) ret["proposal_matcher"] = Matcher( cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, cfg.MODEL.ROI_HEADS.IOU_LABELS, allow_low_quality_matches=False, ) return ret
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super(ROIHeads, self).__init__() # fmt: off self.batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE self.positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION self.test_score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST self.test_nms_thresh = cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST self.test_detections_per_img = cfg.TEST.DETECTIONS_PER_IMAGE self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES self.proposal_append_gt = cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT self.feature_strides = {k: v.stride for k, v in input_shape.items()} self.feature_channels = {k: v.channels for k, v in input_shape.items()} # fmt: on # filter class self.class_filter = [] if 'CLASS_FILTER' in cfg.MODEL.ROI_HEADS: self.class_filter = cfg.MODEL.ROI_HEADS.CLASS_FILTER # Matcher to assign box proposals to gt boxes self.proposal_matcher = Matcher( cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, cfg.MODEL.ROI_HEADS.IOU_LABELS, allow_low_quality_matches=False, )
def __init__(self, cfg): super().__init__() self.image_size = cfg.MODEL.SSD.IMAGE_SIZE self.num_classes = cfg.MODEL.SSD.NUM_CLASSES self.in_features = cfg.MODEL.SSD.IN_FEATURES self.extra_layer_arch = cfg.MODEL.SSD.EXTRA_LAYER_ARCH["SIZE{}".format( self.image_size)] self.l2norm_scale = cfg.MODEL.SSD.L2NORM_SCALE # Loss parameters: self.loss_alpha = cfg.MODEL.SSD.LOSS_ALPHA self.smooth_l1_loss_beta = cfg.MODEL.SSD.SMOOTH_L1_LOSS_BETA self.negative_positive_ratio = cfg.MODEL.SSD.NEGATIVE_POSITIVE_RATIO # Inference parameters: self.score_threshold = cfg.MODEL.SSD.SCORE_THRESH_TEST self.nms_threshold = cfg.MODEL.SSD.NMS_THRESH_TEST self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE # Vis parameters self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT self.backbone = build_backbone(cfg) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] # Build extra layers self.extra_layers = self._make_extra_layers( feature_shapes[-1].channels, self.extra_layer_arch) extra_layer_channels = [ c for c in self.extra_layer_arch if isinstance(c, int) ] feature_shapes += [ ShapeSpec(channels=c) for c in extra_layer_channels[1::2] ] # Head self.head = SSDHead(cfg, feature_shapes) self.l2norm = L2Norm(backbone_shape[self.in_features[0]].channels, self.l2norm_scale) self.default_box_generator = DefaultBox(cfg) self.default_boxes = self.default_box_generator() # Matching and loss self.box2box_transform = Box2BoxTransform( weights=cfg.MODEL.SSD.BBOX_REG_WEIGHTS) self.matcher = Matcher( cfg.MODEL.SSD.IOU_THRESHOLDS, cfg.MODEL.SSD.IOU_LABELS, allow_low_quality_matches=False, ) self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) # Initialization self._init_weights()
def build_roi_mask_head(cfg): matcher = Matcher( cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, cfg.MODEL.ROI_HEADS.IOU_LABELS, allow_low_quality_matches=False, ) return ROIMaskHead(cfg, matcher, (cfg.MODEL.ROI_MASK_HEAD.RESOLUTION_H, cfg.MODEL.ROI_MASK_HEAD.RESOLUTION_W))
def __init__(self, cfg): super(RetinaNet, self).__init__() self.device = torch.device(cfg.MODEL.DEVICE) # fmt: off self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES self.in_features = cfg.MODEL.RETINANET.IN_FEATURES # Loss parameters: self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA # Inference parameters: self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE # Vis parameters self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT # fmt: on self.backbone = build_backbone(cfg) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] self.head = RetinaNetHead(cfg, feature_shapes) self.anchor_generator = build_anchor_generator(cfg, feature_shapes) # Matching and loss self.box2box_transform = Box2BoxTransform( weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) self.matcher = Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, ) assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) num_channels = len(cfg.MODEL.PIXEL_MEAN) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( num_channels, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( num_channels, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device) """ In Detectron1, loss is normalized by number of foreground samples in the batch. When batch size is 1 per GPU, #foreground has a large variance and using it lead to lower performance. Here we maintain an EMA of #foreground to stabilize the normalizer. """ self.loss_normalizer = 100 # initialize with any reasonable #fg that's not too small self.loss_normalizer_momentum = 0.9
def __init__(self, cfg) -> None: super().__init__() self.num_classes: int = cfg.MODEL.RETINANET.NUM_CLASSES self.in_features: List[str] = cfg.MODEL.RETINANET.IN_FEATURES # Loss parameters: self.focal_loss_alpha: float = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA self.focal_loss_gamma: float = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA self.smooth_l1_loss_beta: float = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA # Inference parameters: self.score_threshold: float = cfg.MODEL.RETINANET.SCORE_THRESH_TEST self.topk_candidates: int = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST self.nms_threshold: float = cfg.MODEL.RETINANET.NMS_THRESH_TEST self.max_detections_per_image: int = cfg.TEST.DETECTIONS_PER_IMAGE # Vis parameters self.vis_period: int = cfg.VIS_PERIOD self.input_format: str = cfg.INPUT.FORMAT self.fpn: FPN = build_fpn_backbone( cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) backbone_fpn_output_shape: Dict[str, ShapeSpec] = self.fpn.output_shape() feature_shapes: List[ShapeSpec] = [ backbone_fpn_output_shape[f] for f in self.in_features ] self.head: RetinaNetHead = RetinaNetHead(cfg, feature_shapes) self.anchor_generator: nn.Module = build_anchor_generator( cfg, feature_shapes) # Matching and loss self.box2box_transform: Box2BoxTransform = Box2BoxTransform( weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) self.anchor_matcher: Matcher = Matcher( thresholds=cfg.MODEL.RETINANET.IOU_THRESHOLDS, labels=cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True) self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) # In Detectron1, loss is normalized by number of foreground samples in the batch. # When batch size is 1 per GPU, #foreground has a large variance and # using it lead to lower performance. Here we maintain an EMA of #foreground to # stabilize the normalizer. # Initialize with any reasonable #fg that's not too small self.loss_normalizer: float = 100 self.loss_normalizer_momentum: float = 0.9
def __init__(self, cfg): super().__init__() self.num_classes = cfg.MODEL.RETINAFACE.NUM_CLASSES self.in_features = cfg.MODEL.RETINAFACE.IN_FEATURES # loss parameters self.focal_loss_alpha = cfg.MODEL.RETINAFACE.FOCAL_LOSS_ALPHA self.focal_loss_gamma = cfg.MODEL.RETINAFACE.FOCAL_LOSS_GAMMA self.smooth_l1_loss_beta = cfg.MODEL.RETINAFACE.SMOOTH_L1_LOSS_BETA self.loc_weight = cfg.MODEL.RETINAFACE.LOC_WEIGHT # inference parameters self.score_threshold = cfg.MODEL.RETINAFACE.SCORE_THRESH_TEST self.topk_candidates = cfg.MODEL.RETINAFACE.TOPK_CANDIDATES_TEST self.nms_threshold = cfg.MODEL.RETINAFACE.NMS_THRESH_TEST self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE # visualize parameters self.vis_period = cfg.VIS_PERIOD self.input_format = cfg.INPUT.FORMAT self.backbone = build_backbone(cfg) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] self.head = RetinaFaceHead(cfg, feature_shapes) self.anchor_generator = build_anchor_generator(cfg, feature_shapes) # Matching and loss self.box2box_transform = Box2BoxTransform( weights=cfg.MODEL.RETINAFACE.BBOX_REG_WEIGHTS ) self.landmark2landmark_transform = Landmark2LandmarkTransform( weights=cfg.MODEL.RETINAFACE.LANDMARK_REG_WEIGHTS ) self.matcher = Matcher( cfg.MODEL.RETINAFACE.IOU_THRESHOLDS, cfg.MODEL.RETINAFACE.IOU_LABELS, allow_low_quality_matches=True ) self.register_buffer( "pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1) ) self.register_buffer( "pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1) ) """ In Detectron1, loss is normalized by number of foreground samples in the batch. When batch size is 1 per GPU, #foreground has a large variance and using it lead to lower performance. Here we maintain an EMA of #foreground to stabilize the normalizer. """ # initialize with any reasonable #fg that's not too small self.loss_normalizer = 100 self.loss_normalizer_momentum = 0.9
def __init__(self, cfg): super().__init__() # fmt: off self.device = torch.device(cfg.MODEL.DEVICE) self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES self.in_features = cfg.MODEL.RETINANET.IN_FEATURES # Mask parameters: self.discard_mask_area = cfg.MODEL.YOLACT.DISCARD_MASK_AREA self.num_masks = cfg.MODEL.YOLACT.NUM_MASKS # Loss parameters: self.sem_seg_alpha = cfg.MODEL.YOLACT.SEM_SEG_ALPHA self.mask_alpha = cfg.MODEL.YOLACT.MASK_ALPHA self.mask_reweight = cfg.MODEL.YOLACT.MASK_REWEIGHT self.maskiou_alpha = cfg.MODEL.YOLACT.MASKIOU_ALPHA self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA # Inference parameters: self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE # fmt: on # retinanet_resnet_fpn_backbone self.backbone = build_backbone(cfg) # dict[str->ShapeSpec] backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] # base retinanet add mask coefficient branch self.head = YolactHead(cfg, feature_shapes) # which layer output of backbone to protonet. see offical yolact's cfg.proto_src. # default is `res2`, but this is `res3` self.protonet = ProtoNet(cfg, feature_shapes[0]) # to mask scoring self.maskiou_net = MaskIouNet(cfg) # semantic segmentation to help training self.semantic_seg_conv = nn.Conv2d(feature_shapes[0].channels, self.num_classes, 1) self.anchor_generator = build_anchor_generator(cfg, feature_shapes) # Matching and loss self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) self.matcher = Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, ) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def from_config(cls, cfg): return { "batch_size_per_image": cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, "positive_fraction": cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION, "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES, "proposal_append_gt": cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT, # Matcher to assign box proposals to gt boxes "proposal_matcher": Matcher( cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, cfg.MODEL.ROI_HEADS.IOU_LABELS, allow_low_quality_matches=False, ), }
def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS cascade_ious = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS assert len(cascade_bbox_reg_weights) == len(cascade_ious) assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, \ "CascadeROIHeads only support class-agnostic regression now!" assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0] # fmt: on in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) pooled_shape = ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) box_heads, box_predictors, proposal_matchers = [], [], [] for match_iou, bbox_reg_weights in zip(cascade_ious, cascade_bbox_reg_weights): box_head = build_box_head(cfg, pooled_shape) box_heads.append(box_head) box_predictors.append( RotatedFastRCNNOutputLayers( cfg, box_head.output_shape, box2box_transform=Box2BoxTransformRotated( weights=bbox_reg_weights), )) proposal_matchers.append( Matcher([match_iou], [0, 1], allow_low_quality_matches=False)) return { "box_in_features": in_features, "box_pooler": box_pooler, "box_heads": box_heads, "box_predictors": box_predictors, "proposal_matchers": proposal_matchers, }
def __init__(self, cfg=None): super(Yolov3, self).__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.num_classes = cfg.MODEL.YOLOV3.NUM_CLASSES self.norm = cfg.MODEL.YOLOV3.NORM self.in_features = cfg.MODEL.YOLOV3.IN_FEATURES self.anchors = cfg.MODEL.ANCHOR_GENERATOR.SIZES # Inference parameters: self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE self.backbone = build_backbone(cfg) self.head = Yolov3Head(in_features=self.in_features, in_channels=[ self.backbone._out_feature_channels[f] for f in self.in_features ], out_channels=cfg.MODEL.YOLOV3.HEAD.OUT_CHANNELS, num_classes=self.num_classes, num_anchors_per_cell=3, norm=self.norm) backbone_shape = self.backbone.output_shape() self.feature_strides = [ backbone_shape[f].stride for f in self.in_features ] self.feature_shapes = [backbone_shape[f] for f in self.in_features] self.anchor_generator = build_anchor_generator(cfg, self.feature_shapes) self.grid_generator = build_grid_generator(cfg, self.feature_shapes) self.stride_generator = build_stride_generator(cfg, self.feature_shapes) # self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) self.matcher = Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, ) self.normalizer = lambda x: x / 255.0 self.to(self.device) self.get_conv_bn_modules() self.bce_loss = nn.BCELoss() self.sigmoid = nn.Sigmoid()
def __init__(self, cfg): super().__init__() self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES self.in_features = cfg.MODEL.RETINANET.IN_FEATURES self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE self.num_points = cfg.MODEL.PROPOSAL_GENERATOR.NUM_POINTS self.backbone = build_backbone(cfg) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] self.head = ReppointsRetinaNetHead(cfg, feature_shapes) grid = uniform_grid(2048) self.register_buffer("grid", grid) self.point_strides = [8, 16, 32, 64, 128] self.loss_normalizer = 20 # initialize with any reasonable #fg that's not too small self.loss_normalizer_momentum = 0.9 input_shape = self.backbone.output_shape() self.strides = [input_shape[f].stride for f in self.in_features] self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) self.vis_period = 1024 # Assigning init box labels. if cfg.MODEL.PROPOSAL_GENERATOR.SAMPLE_MODE == 'points': from slender_det.modeling.matchers.rep_matcher import rep_points_match self.matcher = rep_points_match elif cfg.MODEL.PROPOSAL_GENERATOR.SAMPLE_MODE == 'nearest_points': from slender_det.modeling.matchers.rep_matcher import nearest_point_match self.matcher = nearest_point_match else: assert cfg.MODEL.PROPOSAL_GENERATOR.SAMPLE_MODE == 'inside' from slender_det.modeling.matchers.rep_matcher import inside_match self.matcher = inside_match # Used for matching refine box labels. self.bbox_matcher = Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, )
def build_matcher(cfg): type = cfg.MODEL.RPN.MATCHER.TYPE assert type in MATCHER_TYPES, "Matcher Type doesn't exist!" \ "Expected one in {}," \ "But got {}".format(MATCHER_TYPES, type) if type == "Origin": return Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True) elif type == "TopK": return TopKMatcher(cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, cfg.MODEL.RPN.MATCHER.TOPK) else: raise ValueError("Unknown Matcher type: {}".format(type))
def __init__(self, dataset_name, cfg, distributed, output_dir=None): """ Args: dataset_name (str): name of the dataset to be evaluated. It must have either the following corresponding metadata: "json_file": the path to the COCO format annotation Or it must be in detectron2's standard dataset format so it can be converted to COCO format automatically. cfg (CfgNode): config instance distributed (True): if True, will collect results from all ranks for evaluation. Otherwise, will evaluate the results in the current process. output_dir (str): optional, an output directory to dump all results predicted on the dataset. The dump contains: "instances_results.json" a json file containing the evaluation results. """ self._predictions = [] self._fiber_results = [] self._results = None # Matcher to assign predictions to annotations self._bbox_matcher = Matcher( cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, cfg.MODEL.ROI_HEADS.IOU_LABELS, allow_low_quality_matches=False, ) self._tasks = ("fiberwidth", "fiberlength") self._modes = ("strict", "loose") self._distributed = distributed self._output_dir = output_dir self._cpu_device = torch.device("cpu") self._logger = logging.getLogger(__name__) self._metadata = MetadataCatalog.get(dataset_name) assert hasattr( self._metadata, "json_file" ), f"json_file was not found in MetaDataCatalog for '{dataset_name}'" self._get_annotations()
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) # fmt: off self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES self.in_features = cfg.MODEL.RETINANET.IN_FEATURES # Loss parameters: self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA # Inference parameters: self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE # fmt: on self.backbone = build_backbone(cfg) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] self.head = RetinaNetHead(cfg, feature_shapes) self.anchor_generator = build_anchor_generator(cfg, feature_shapes) # Matching and loss self.box2box_transform = Box2BoxTransformRotated( weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS) self.matcher = Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, ) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]): in_features = cfg.MODEL.RPN.IN_FEATURES ret = { "in_features": in_features, "min_box_size": cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE, "nms_thresh": cfg.MODEL.RPN.NMS_THRESH, "batch_size_per_image": cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE, "positive_fraction": cfg.MODEL.RPN.POSITIVE_FRACTION, "loss_weight": { "loss_rpn_cls": cfg.MODEL.RPN.LOSS_WEIGHT, "loss_rpn_loc": cfg.MODEL.RPN.BBOX_REG_LOSS_WEIGHT * cfg.MODEL.RPN.LOSS_WEIGHT, }, "anchor_boundary_thresh": cfg.MODEL.RPN.BOUNDARY_THRESH, "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS), "box_reg_loss_type": cfg.MODEL.RPN.BBOX_REG_LOSS_TYPE, "smooth_l1_beta": cfg.MODEL.RPN.SMOOTH_L1_BETA, } ret["pre_nms_topk"] = (cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN, cfg.MODEL.RPN.PRE_NMS_TOPK_TEST) ret["post_nms_topk"] = (cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN, cfg.MODEL.RPN.POST_NMS_TOPK_TEST) ret["anchor_generator"] = build_anchor_generator( cfg, [input_shape[f] for f in in_features]) ret["anchor_matcher"] = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True) ret["head"] = build_rpn_head(cfg, [input_shape[f] for f in in_features]) return ret
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super().__init__() # fmt: off self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE self.in_features = cfg.MODEL.RPN.IN_FEATURES self.nms_thresh = cfg.MODEL.RPN.NMS_THRESH self.batch_size_per_image = cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE self.positive_fraction = cfg.MODEL.RPN.POSITIVE_FRACTION self.smooth_l1_beta = cfg.MODEL.RPN.SMOOTH_L1_BETA self.loss_weight = cfg.MODEL.RPN.LOSS_WEIGHT self.num_frames = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES self.temporal_dropout = cfg.MODEL.SPATIOTEMPORAL.TEMPORAL_DROPOUT # fmt: on if self.temporal_dropout: assert cfg.MODEL.SPATIOTEMPORAL.FORWARD_AGGREGATION, "Temporal dropout without forward aggregation." if cfg.MODEL.SPATIOTEMPORAL.FORWARD_AGGREGATION: # (f_{t-NUM_FRAMES}, ..., f_{t-1}, f_t, f_{t+1}, ..., f_{t+NUM_FRAMES}) self.num_frames = (2 * self.num_frames) + 1 # Map from self.training state to train/test settings self.pre_nms_topk = { True: cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN, False: cfg.MODEL.RPN.PRE_NMS_TOPK_TEST, } self.post_nms_topk = { True: cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN, False: cfg.MODEL.RPN.POST_NMS_TOPK_TEST, } self.boundary_threshold = cfg.MODEL.RPN.BOUNDARY_THRESH self.anchor_generator = build_anchor_generator( cfg, [input_shape[f] for f in self.in_features]) self.box2box_transform = Box2BoxTransform( weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) self.anchor_matcher = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True) self.rpn_head = build_rpn_head( cfg, [input_shape[f] for f in self.in_features])
def from_config(cls, cfg): backbone = build_backbone(cfg) backbone_shape = backbone.output_shape() feature_shapes = [backbone_shape[f] for f in cfg.MODEL.RETINANET.IN_FEATURES] anchor_generator = build_anchor_generator(cfg, feature_shapes) metadata = MetadataCatalog.get( cfg.DATASETS.TRAIN[0] if len(cfg.DATASETS.TRAIN) else "__unused" ) return { "backbone": backbone, "head": RetinaFaceHead(cfg, feature_shapes), "anchor_generator": anchor_generator, "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS), "mark2mark_transform": Mark2MarkTransform(cfg.MODEL.RETINAFACE.NUM_LANDMARK, weights=cfg.MODEL.RETINAFACE.LANDMARK_REG_WEIGHTS), "anchor_matcher": Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, ), "pixel_mean": cfg.MODEL.PIXEL_MEAN, "pixel_std": cfg.MODEL.PIXEL_STD, "num_classes": cfg.MODEL.RETINANET.NUM_CLASSES, "num_landmark": cfg.MODEL.RETINAFACE.NUM_LANDMARK, "head_in_features": cfg.MODEL.RETINANET.IN_FEATURES, # Loss parameters: "focal_loss_alpha": cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA, "focal_loss_gamma": cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA, "smooth_l1_beta": cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA, "box_reg_loss_type": cfg.MODEL.RETINANET.BBOX_REG_LOSS_TYPE, "loc_weight": cfg.MODEL.RETINAFACE.LOC_WEIGHT, # Inference parameters: "test_score_thresh": cfg.MODEL.RETINANET.SCORE_THRESH_TEST, "test_topk_candidates": cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST, "test_nms_thresh": cfg.MODEL.RETINANET.NMS_THRESH_TEST, "max_detections_per_image": cfg.TEST.DETECTIONS_PER_IMAGE, # Vis parameters "vis_period": cfg.VIS_PERIOD, "input_format": cfg.INPUT.FORMAT, "visualizer": TrainingVisualizer(detector_postprocess, metadata), }
def __init__(self, cfg): super().__init__() self.in_features = cfg.MODEL.FCOS.IN_FEATURES # Loss parameters: # defined by method<get_ground_truth> self.num_points_per_level = None self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES self.center_sampling_radius = cfg.MODEL.FCOS.CENTER_SAMPLING_RADIUS self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS self.focal_loss_alpha = cfg.MODEL.FCOS.FOCAL_LOSS_ALPHA self.focal_loss_gamma = cfg.MODEL.FCOS.FOCAL_LOSS_GAMMA self.iou_loss_type = cfg.MODEL.FCOS.IOU_LOSS_TYPE # Inference parameters: self.score_thresh = 0.3 self.pre_nms_thresh = cfg.MODEL.FCOS.INFERENCE_TH self.pre_nms_top_n = cfg.MODEL.FCOS.PRE_NMS_TOP_N self.nms_thresh = cfg.MODEL.FCOS.NMS_TH self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE self.min_size = 0 self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES self.backbone = build_backbone(cfg) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] self.head = FCOSRepPointsHead(cfg, feature_shapes) self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) self.bbox_matcher = Matcher( cfg.MODEL.RETINANET.IOU_THRESHOLDS, cfg.MODEL.RETINANET.IOU_LABELS, allow_low_quality_matches=True, )
def __init__(self, cfg, input_shape: List[ShapeSpec]): super().__init__(cfg, input_shape) head_params = cfg.MODEL.META_ARCH self.box_reg_loss_type = head_params.BBOX_REG_LOSS_TYPE self.anchor_generator = build_anchor_generator(cfg, input_shape) self.num_anchor = self.anchor_generator.num_cell_anchors[0] self.feat_adaptive = head_params.FEAT_ADAPTION # init bbox pred self.loc_init_conv = nn.Conv2d(self.feat_channels, self.loc_feat_channels, 3, 1, 1) self.loc_init_out = nn.Conv2d(self.loc_feat_channels, 4, 3, 1, 1) # Matching and loss self.box2box_transform = Box2BoxTransform( weights=head_params.BBOX_REG_WEIGHTS) self.anchor_matcher = Matcher( head_params.IOU_THRESHOLDS, head_params.IOU_LABELS, allow_low_quality_matches=True, ) self.strides = [i.stride for i in input_shape] self.matcher = nearest_point_match # make feature adaptive layer self.make_feature_adaptive_layers() self.cls_out = nn.Conv2d(self.feat_channels, self.num_anchor * self.num_classes, 3, 1, 1) self.loc_refine_out = nn.Conv2d(self.loc_feat_channels, self.num_anchor * 4, 3, 1, 1) self._init_weights() self.loss_normalizer = 100 # initialize with any reasonable #fg that's not too small self.loss_normalizer_momentum = 0.9 grid = uniform_grid(2048) self.register_buffer("grid", grid)
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super(StandardHOROIHeads, self).__init__() # fmt: off self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES self.proposal_append_gt = cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES self.num_actions = cfg.MODEL.ROI_HEADS.NUM_ACTIONS self.box_batch_size_per_image = cfg.MODEL.ROI_HEADS.BOX_BATCH_SIZE_PER_IMAGE self.hoi_batch_size_per_image = cfg.MODEL.ROI_HEADS.HOI_BATCH_SIZE_PER_IMAGE self.box_positive_sample_fraction = cfg.MODEL.ROI_HEADS.BOX_POSITIVE_FRACTION self.hoi_positive_sample_fraction = cfg.MODEL.ROI_HEADS.HOI_POSITIVE_FRACTION # fmt: on # Matcher to assign box proposals to gt boxes self.proposal_matcher = Matcher( cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, cfg.MODEL.ROI_HEADS.IOU_LABELS, allow_low_quality_matches=False, ) self._init_box_head(cfg, input_shape) self._init_hoi_head(cfg, input_shape)
def __init__( self, images, locations, logits_pred, reg_pred, ctrness_pred, mask_regression, mask_encoding, focal_loss_alpha, focal_loss_gamma, iou_loss, center_sample, sizes_of_interest, strides, radius, num_classes, pre_nms_thresh, pre_nms_top_n, nms_thresh, fpn_post_nms_top_n, thresh_with_ctr, gt_instances=None, cfg=None, ): self.cfg = cfg self.logits_pred = logits_pred self.reg_pred = reg_pred self.ctrness_pred = ctrness_pred self.locations = locations self.mask_regression = mask_regression self.mask_encoding = mask_encoding self.gt_instances = gt_instances self.num_feature_maps = len(logits_pred) self.num_images = len(images) self.image_sizes = images.image_sizes self.focal_loss_alpha = focal_loss_alpha self.focal_loss_gamma = focal_loss_gamma self.iou_loss = iou_loss self.center_sample = center_sample self.sizes_of_interest = sizes_of_interest self.strides = strides self.radius = radius self.num_classes = num_classes self.pre_nms_thresh = pre_nms_thresh self.pre_nms_top_n = pre_nms_top_n self.nms_thresh = nms_thresh self.fpn_post_nms_top_n = fpn_post_nms_top_n self.thresh_with_ctr = thresh_with_ctr self.loss_on_mask = cfg.MODEL.SMInst.LOSS_ON_MASK self.loss_on_code = cfg.MODEL.SMInst.LOSS_ON_CODE self.mask_loss_type = cfg.MODEL.SMInst.MASK_LOSS_TYPE self.num_codes = cfg.MODEL.SMInst.NUM_CODE self.mask_size = cfg.MODEL.SMInst.MASK_SIZE self.mask_sparse_weight = cfg.MODEL.SMInst.MASK_SPARSE_WEIGHT self.mask_loss_weight = cfg.MODEL.SMInst.MASK_LOSS_WEIGHT self.sparsity_loss_type = cfg.MODEL.SMInst.SPARSITY_LOSS_TYPE self.kl_rho = cfg.MODEL.SMInst.SPARSITY_KL_RHO # Matcher to assign box proposals to gt boxes self.proposal_matcher = Matcher( cfg.MODEL.SMInst.IOU_THRESHOLDS, cfg.MODEL.SMInst.IOU_LABELS, allow_low_quality_matches=False, )
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES self.num_classes = cfg.MODEL.SparseRCNN.NUM_CLASSES self.num_proposals = cfg.MODEL.SparseRCNN.NUM_PROPOSALS self.hidden_dim = cfg.MODEL.SparseRCNN.HIDDEN_DIM self.num_heads = cfg.MODEL.SparseRCNN.NUM_HEADS # Build Backbone. self.backbone = build_backbone(cfg) self.size_divisibility = self.backbone.size_divisibility # Build Proposals. self.init_proposal_features = nn.Embedding(self.num_proposals, self.hidden_dim) self.init_proposal_boxes = nn.Embedding(self.num_proposals, 4) nn.init.constant_(self.init_proposal_boxes.weight[:, :2], 0.5) nn.init.constant_(self.init_proposal_boxes.weight[:, 2:], 1.0) # Build Dynamic Head. self.head = DynamicHead(cfg=cfg, roi_input_shape=self.backbone.output_shape()) # TODO #2 mask head self.mask_pooler, self.mask_head = self._init_mask_head(cfg, input_shape=self.backbone.output_shape()) self.proposal_append_gt = cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT self.proposal_matcher = Matcher( cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, cfg.MODEL.ROI_HEADS.IOU_LABELS, allow_low_quality_matches=False, ) # Loss parameters: class_weight = cfg.MODEL.SparseRCNN.CLASS_WEIGHT giou_weight = cfg.MODEL.SparseRCNN.GIOU_WEIGHT l1_weight = cfg.MODEL.SparseRCNN.L1_WEIGHT mask_weight = cfg.MODEL.SparseRCNN.MASK_WEIGHT no_object_weight = cfg.MODEL.SparseRCNN.NO_OBJECT_WEIGHT self.deep_supervision = cfg.MODEL.SparseRCNN.DEEP_SUPERVISION self.use_focal = cfg.MODEL.SparseRCNN.USE_FOCAL # Build Criterion. matcher = HungarianMatcher(cfg=cfg, cost_class=class_weight, cost_bbox=l1_weight, cost_giou=giou_weight, use_focal=self.use_focal) weight_dict = {"loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight, "loss_mask": mask_weight} if self.deep_supervision: aux_weight_dict = {} for i in range(self.num_heads - 1): aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) losses = ["labels", "boxes"] self.criterion = SetCriterion(cfg=cfg, num_classes=self.num_classes, matcher=matcher, weight_dict=weight_dict, eos_coef=no_object_weight, losses=losses, use_focal=self.use_focal) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def f(thresholds: List[float], labels: List[int]): return Matcher(thresholds, labels, allow_low_quality_matches=True)