Exemple #1
0
    def __init__(self, cfg):
        super().__init__(cfg)
        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = PointRetinaNetHead(cfg, feature_shapes)
        grid = uniform_grid(2048)
        self.register_buffer("grid", grid)
        self.num_points = cfg.MODEL.PROPOSAL_GENERATOR.NUM_POINTS
        self.point_strides = [8, 16, 32, 64, 128]
        self.loss_normalizer = 20  # initialize with any reasonable #fg that's not too small
        self.loss_normalizer_momentum = 0.9
        self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINANET.IN_FEATURES
        input_shape = self.backbone.output_shape()
        self.strides = [input_shape[f].stride for f in self.in_features]

        # Assigning init box labels.
        if cfg.MODEL.PROPOSAL_GENERATOR.SAMPLE_MODE == 'points':
            from slender_det.modeling.matchers.rep_matcher import rep_points_match_with_classes
            self.matcher = rep_points_match_with_classes
        elif cfg.MODEL.PROPOSAL_GENERATOR.SAMPLE_MODE == 'nearest_points':
            from slender_det.modeling.matchers.rep_matcher import nearest_point_match
            self.matcher = nearest_point_match
        else:
            assert cfg.MODEL.PROPOSAL_GENERATOR.SAMPLE_MODE == 'inside'
            from slender_det.modeling.matchers.rep_matcher import inside_match
            self.matcher = inside_match

        # Used for matching refine box labels.
        self.bbox_matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )
Exemple #2
0
def setup(file):
    # get cfg
    cfg = get_cfg()
    cfg.merge_from_file(file)
    cfg.SOLVER.IMS_PER_BATCH = 2

    # get data loader iter
    data_loader = build_detection_train_loader(cfg)
    data_loader_iter = iter(data_loader)
    batched_inputs = next(data_loader_iter)

    # build anchors
    backbone = build_backbone(cfg).to(device)
    images = [x["image"].to(device) for x in batched_inputs]
    images = ImageList.from_tensors(images, backbone.size_divisibility)
    features = backbone(images.tensor.float())

    input_shape = backbone.output_shape()
    in_features = cfg.MODEL.RPN.IN_FEATURES
    anchor_generator = build_anchor_generator(
        cfg, [input_shape[f] for f in in_features])
    anchors = anchor_generator([features[f] for f in in_features])
    anchors = Boxes.cat(anchors).to(device)

    # build matcher
    raw_matcher = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                          cfg.MODEL.RPN.IOU_LABELS,
                          allow_low_quality_matches=True)
    matcher = TopKMatcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                          cfg.MODEL.RPN.IOU_LABELS, 9)

    return cfg, data_loader_iter, anchors, matcher, raw_matcher
Exemple #3
0
    def test_scriptability(self):
        cfg = get_cfg()
        anchor_matcher = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                                 cfg.MODEL.RPN.IOU_LABELS,
                                 allow_low_quality_matches=True)
        match_quality_matrix = torch.tensor([[0.15, 0.45, 0.2, 0.6],
                                             [0.3, 0.65, 0.05, 0.1],
                                             [0.05, 0.4, 0.25, 0.4]])
        expected_matches = torch.tensor([1, 1, 2, 0])
        expected_match_labels = torch.tensor([-1, 1, 0, 1], dtype=torch.int8)

        matches, match_labels = anchor_matcher(match_quality_matrix)
        self.assertTrue(torch.allclose(matches, expected_matches))
        self.assertTrue(torch.allclose(match_labels, expected_match_labels))

        # nonzero_tuple must be import explicitly to let jit know what it is.
        # https://github.com/pytorch/pytorch/issues/38964
        from detectron2.layers import nonzero_tuple  # noqa F401

        def f(thresholds: List[float], labels: List[int]):
            return Matcher(thresholds, labels, allow_low_quality_matches=True)

        scripted_anchor_matcher = torch.jit.script(f)(
            cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS)
        matches, match_labels = scripted_anchor_matcher(match_quality_matrix)
        self.assertTrue(torch.allclose(matches, expected_matches))
        self.assertTrue(torch.allclose(match_labels, expected_match_labels))
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super().__init__()

        # fmt: off
        self.min_box_side_len        = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
        self.in_features             = cfg.MODEL.RPN.IN_FEATURES
        self.nms_thresh              = cfg.MODEL.RPN.NMS_THRESH
        self.batch_size_per_image    = cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE
        self.positive_fraction       = cfg.MODEL.RPN.POSITIVE_FRACTION
        self.smooth_l1_beta          = cfg.MODEL.RPN.SMOOTH_L1_BETA
        self.loss_weight             = cfg.MODEL.RPN.LOSS_WEIGHT
        # fmt: on

        # Map from self.training state to train/test settings
        self.pre_nms_topk = {
            True: cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN,
            False: cfg.MODEL.RPN.PRE_NMS_TOPK_TEST,
        }
        self.post_nms_topk = {
            True: cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN,
            False: cfg.MODEL.RPN.POST_NMS_TOPK_TEST,
        }
        self.boundary_threshold = cfg.MODEL.RPN.BOUNDARY_THRESH

        self.anchor_generator = build_anchor_generator(
            cfg, [input_shape[f] for f in self.in_features]
        )
        self.box2box_transform = BUABox2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        self.anchor_matcher = Matcher(
            cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True
        )
        self.rpn_head = build_rpn_head(cfg, [input_shape[f] for f in self.in_features])
Exemple #5
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super(ROIHeads, self).__init__()

        # fmt: off
        self.batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
        self.positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
        self.test_score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST
        self.test_nms_thresh = cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST
        self.test_detections_per_img = cfg.TEST.DETECTIONS_PER_IMAGE
        self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
        self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        self.proposal_append_gt = cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT
        self.feature_strides = {k: v.stride for k, v in input_shape.items()}
        self.feature_channels = {k: v.channels for k, v in input_shape.items()}
        self.cls_agnostic_bbox_reg = cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG
        self.smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA
        # fmt: on

        # Matcher to assign box proposals to gt boxes
        self.proposal_matcher = Matcher(
            cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
            cfg.MODEL.ROI_HEADS.IOU_LABELS,
            allow_low_quality_matches=False,
        )

        # Box2BoxTransform for bounding box regression
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS)
Exemple #6
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super(StandardHOROIHeads, self).__init__()
        # fmt: off
        self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
        self.proposal_append_gt = cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT
        self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        self.num_actions = cfg.MODEL.ROI_HEADS.NUM_ACTIONS
        self.box_batch_size_per_image = cfg.MODEL.ROI_HEADS.BOX_BATCH_SIZE_PER_IMAGE
        self.hoi_batch_size_per_image = cfg.MODEL.ROI_HEADS.HOI_BATCH_SIZE_PER_IMAGE
        self.box_positive_sample_fraction = cfg.MODEL.ROI_HEADS.BOX_POSITIVE_FRACTION
        self.hoi_positive_sample_fraction = cfg.MODEL.ROI_HEADS.HOI_POSITIVE_FRACTION
        self.compose_learning = cfg.MODEL.ROI_HEADS.CL
        self.cl_weight = cfg.MODEL.ROI_HEADS.CL_WEIGHT
        self.is_hoi_prediction = cfg.MODEL.ROI_HEADS.IS_HOI_PRED
        self.remove_obj_det = cfg.MODEL.ROI_HEADS.REMVOE_OBJ_ONLY_DET
        self.obj_image_nums = cfg.MODEL.ROI_HEADS.OBJ_IMG_NUMS
        # fmt: on
        # Matcher to assign box proposals to gt boxes
        self.proposal_matcher = Matcher(
            cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
            cfg.MODEL.ROI_HEADS.IOU_LABELS,
            allow_low_quality_matches=False,
        )

        self._init_box_head(cfg, input_shape)
        self._init_hoi_head(cfg, input_shape)

        verb_to_HO_matrix, obj_to_HO_matrix = get_convert_matrix(
            obj_class_num=81)
        self.verb_to_HO_matrix = torch.from_numpy(verb_to_HO_matrix)
        self.obj_to_HO_matrix = torch.from_numpy(obj_to_HO_matrix)
Exemple #7
0
 def from_config(cls, cfg, input_shape):
     ret = super().from_config(cfg)
     ret["train_on_pred_boxes"] = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES
     ret["add_noise_to_proposals"] = cfg.MODEL.ROI_BOX_HEAD.ADD_NOISE_TO_PROPOSALS
     ret["encoder_feature"] = cfg.MODEL.ROI_BOX_HEAD.ENCODER_FEATURE
     ret["random_sample_size"] = cfg.MODEL.ROI_BOX_HEAD.RANDOM_SAMPLE_SIZE
     ret["random_sample_size_upper_bound"] = cfg.MODEL.ROI_BOX_HEAD.RANDOM_SAMPLE_SIZE_UPPER_BOUND
     ret["random_sample_size_lower_bound"] = cfg.MODEL.ROI_BOX_HEAD.RANDOM_SAMPLE_SIZE_LOWER_BOUND
     ret["random_proposal_drop"] = cfg.MODEL.ROI_BOX_HEAD.RANDOM_PROPOSAL_DROP
     ret["random_proposal_drop_upper_bound"] = cfg.MODEL.ROI_BOX_HEAD.RANDOM_PROPOSAL_DROP_UPPER_BOUND
     ret["random_proposal_drop_lower_bound"] = cfg.MODEL.ROI_BOX_HEAD.RANDOM_PROPOSAL_DROP_LOWER_BOUND
     ret["max_proposal_per_batch"] = cfg.MODEL.ROI_BOX_HEAD.MAX_PROPOSAL_PER_BATCH
     # Subclasses that have not been updated to use from_config style construction
     # may have overridden _init_*_head methods. In this case, those overridden methods
     # will not be classmethods and we need to avoid trying to call them here.
     # We test for this with ismethod which only returns True for bound methods of cls.
     # Such subclasses will need to handle calling their overridden _init_*_head methods.
     if inspect.ismethod(cls._init_box_head):
         ret.update(cls._init_box_head(cfg, input_shape))
     if inspect.ismethod(cls._init_mask_head):
         ret.update(cls._init_mask_head(cfg, input_shape))
     if inspect.ismethod(cls._init_keypoint_head):
         ret.update(cls._init_keypoint_head(cfg, input_shape))
     ret["proposal_matcher"] = Matcher(
         cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
         cfg.MODEL.ROI_HEADS.IOU_LABELS,
         allow_low_quality_matches=False,
     )
     return ret
Exemple #8
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super(ROIHeads, self).__init__()

        # fmt: off
        self.batch_size_per_image     = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE
        self.positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
        self.test_score_thresh        = cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST
        self.test_nms_thresh          = cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST
        self.test_detections_per_img  = cfg.TEST.DETECTIONS_PER_IMAGE
        self.in_features              = cfg.MODEL.ROI_HEADS.IN_FEATURES
        self.num_classes              = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        self.proposal_append_gt       = cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT
        self.feature_strides          = {k: v.stride for k, v in input_shape.items()}
        self.feature_channels         = {k: v.channels for k, v in input_shape.items()}
        # fmt: on
        # filter class
        self.class_filter = []
        if 'CLASS_FILTER' in cfg.MODEL.ROI_HEADS:
            self.class_filter = cfg.MODEL.ROI_HEADS.CLASS_FILTER
        # Matcher to assign box proposals to gt boxes
        self.proposal_matcher = Matcher(
            cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
            cfg.MODEL.ROI_HEADS.IOU_LABELS,
            allow_low_quality_matches=False,
        )
Exemple #9
0
    def __init__(self, cfg):
        super().__init__()

        self.image_size = cfg.MODEL.SSD.IMAGE_SIZE
        self.num_classes = cfg.MODEL.SSD.NUM_CLASSES
        self.in_features = cfg.MODEL.SSD.IN_FEATURES
        self.extra_layer_arch = cfg.MODEL.SSD.EXTRA_LAYER_ARCH["SIZE{}".format(
            self.image_size)]
        self.l2norm_scale = cfg.MODEL.SSD.L2NORM_SCALE
        # Loss parameters:
        self.loss_alpha = cfg.MODEL.SSD.LOSS_ALPHA
        self.smooth_l1_loss_beta = cfg.MODEL.SSD.SMOOTH_L1_LOSS_BETA
        self.negative_positive_ratio = cfg.MODEL.SSD.NEGATIVE_POSITIVE_RATIO
        # Inference parameters:
        self.score_threshold = cfg.MODEL.SSD.SCORE_THRESH_TEST
        self.nms_threshold = cfg.MODEL.SSD.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # Vis parameters
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]

        # Build extra layers
        self.extra_layers = self._make_extra_layers(
            feature_shapes[-1].channels, self.extra_layer_arch)
        extra_layer_channels = [
            c for c in self.extra_layer_arch if isinstance(c, int)
        ]
        feature_shapes += [
            ShapeSpec(channels=c) for c in extra_layer_channels[1::2]
        ]

        # Head
        self.head = SSDHead(cfg, feature_shapes)
        self.l2norm = L2Norm(backbone_shape[self.in_features[0]].channels,
                             self.l2norm_scale)
        self.default_box_generator = DefaultBox(cfg)
        self.default_boxes = self.default_box_generator()

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.SSD.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.SSD.IOU_THRESHOLDS,
            cfg.MODEL.SSD.IOU_LABELS,
            allow_low_quality_matches=False,
        )

        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))

        # Initialization
        self._init_weights()
def build_roi_mask_head(cfg):
    matcher = Matcher(
        cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
        cfg.MODEL.ROI_HEADS.IOU_LABELS,
        allow_low_quality_matches=False,
    )
    return ROIMaskHead(cfg, matcher, (cfg.MODEL.ROI_MASK_HEAD.RESOLUTION_H,
                                      cfg.MODEL.ROI_MASK_HEAD.RESOLUTION_W))
    def __init__(self, cfg):
        super(RetinaNet, self).__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINANET.IN_FEATURES
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # Vis parameters
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
        # fmt: on

        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = RetinaNetHead(cfg, feature_shapes)
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
        """
    In Detectron1, loss is normalized by number of foreground samples in the batch.
    When batch size is 1 per GPU, #foreground has a large variance and
    using it lead to lower performance. Here we maintain an EMA of #foreground to
    stabilize the normalizer.
    """
        self.loss_normalizer = 100  # initialize with any reasonable #fg that's not too small
        self.loss_normalizer_momentum = 0.9
Exemple #12
0
    def __init__(self, cfg) -> None:
        super().__init__()

        self.num_classes: int = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features: List[str] = cfg.MODEL.RETINANET.IN_FEATURES
        # Loss parameters:
        self.focal_loss_alpha: float = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma: float = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta: float = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold: float = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates: int = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold: float = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image: int = cfg.TEST.DETECTIONS_PER_IMAGE
        # Vis parameters
        self.vis_period: int = cfg.VIS_PERIOD
        self.input_format: str = cfg.INPUT.FORMAT

        self.fpn: FPN = build_fpn_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))

        backbone_fpn_output_shape: Dict[str,
                                        ShapeSpec] = self.fpn.output_shape()

        feature_shapes: List[ShapeSpec] = [
            backbone_fpn_output_shape[f] for f in self.in_features
        ]
        self.head: RetinaNetHead = RetinaNetHead(cfg, feature_shapes)

        self.anchor_generator: nn.Module = build_anchor_generator(
            cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform: Box2BoxTransform = Box2BoxTransform(
            weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)

        self.anchor_matcher: Matcher = Matcher(
            thresholds=cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            labels=cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True)

        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))

        # In Detectron1, loss is normalized by number of foreground samples in the batch.
        # When batch size is 1 per GPU, #foreground has a large variance and
        # using it lead to lower performance. Here we maintain an EMA of #foreground to
        # stabilize the normalizer.

        # Initialize with any reasonable #fg that's not too small
        self.loss_normalizer: float = 100
        self.loss_normalizer_momentum: float = 0.9
    def __init__(self, cfg):
        super().__init__()

        self.num_classes = cfg.MODEL.RETINAFACE.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINAFACE.IN_FEATURES
        # loss parameters
        self.focal_loss_alpha = cfg.MODEL.RETINAFACE.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINAFACE.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.RETINAFACE.SMOOTH_L1_LOSS_BETA
        self.loc_weight = cfg.MODEL.RETINAFACE.LOC_WEIGHT
        # inference parameters
        self.score_threshold = cfg.MODEL.RETINAFACE.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINAFACE.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINAFACE.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # visualize parameters
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        self.backbone = build_backbone(cfg)
        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = RetinaFaceHead(cfg, feature_shapes)
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.RETINAFACE.BBOX_REG_WEIGHTS
        )
        self.landmark2landmark_transform = Landmark2LandmarkTransform(
            weights=cfg.MODEL.RETINAFACE.LANDMARK_REG_WEIGHTS
        )
        self.matcher = Matcher(
            cfg.MODEL.RETINAFACE.IOU_THRESHOLDS,
            cfg.MODEL.RETINAFACE.IOU_LABELS,
            allow_low_quality_matches=True
        )
        self.register_buffer(
            "pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)
        )
        self.register_buffer(
            "pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)
        )

        """
        In Detectron1, loss is normalized by number of foreground samples in the 
        batch. When batch size is 1 per GPU, #foreground has a large variance and
        using it lead to lower performance. Here we maintain an EMA of #foreground
        to stabilize the normalizer.
        """
        # initialize with any reasonable #fg that's not too small
        self.loss_normalizer = 100
        self.loss_normalizer_momentum = 0.9
Exemple #14
0
    def __init__(self, cfg):
        super().__init__()
        # fmt: off
        self.device                   = torch.device(cfg.MODEL.DEVICE)
        self.num_classes              = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features              = cfg.MODEL.RETINANET.IN_FEATURES
        # Mask parameters:
        self.discard_mask_area        = cfg.MODEL.YOLACT.DISCARD_MASK_AREA
        self.num_masks                = cfg.MODEL.YOLACT.NUM_MASKS
        # Loss parameters:
        self.sem_seg_alpha            = cfg.MODEL.YOLACT.SEM_SEG_ALPHA
        self.mask_alpha               = cfg.MODEL.YOLACT.MASK_ALPHA
        self.mask_reweight            = cfg.MODEL.YOLACT.MASK_REWEIGHT
        self.maskiou_alpha            = cfg.MODEL.YOLACT.MASKIOU_ALPHA
        self.focal_loss_alpha         = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma         = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta      = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold          = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates          = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold            = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # fmt: on

        # retinanet_resnet_fpn_backbone
        self.backbone = build_backbone(cfg)
        # dict[str->ShapeSpec]
        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        # base retinanet add mask coefficient branch 
        self.head = YolactHead(cfg, feature_shapes)
        # which layer output of backbone to protonet. see offical yolact's cfg.proto_src.
        # default is `res2`, but this is `res3`
        self.protonet = ProtoNet(cfg, feature_shapes[0])
        # to mask scoring
        self.maskiou_net = MaskIouNet(cfg)
        # semantic segmentation to help training
        self.semantic_seg_conv = nn.Conv2d(feature_shapes[0].channels, self.num_classes, 1)
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Exemple #15
0
 def from_config(cls, cfg):
     return {
         "batch_size_per_image": cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE,
         "positive_fraction": cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION,
         "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES,
         "proposal_append_gt": cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT,
         # Matcher to assign box proposals to gt boxes
         "proposal_matcher": Matcher(
             cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
             cfg.MODEL.ROI_HEADS.IOU_LABELS,
             allow_low_quality_matches=False,
         ),
     }
Exemple #16
0
    def _init_box_head(cls, cfg, input_shape):
        # fmt: off
        in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features)
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
        cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS
        cascade_ious = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS
        assert len(cascade_bbox_reg_weights) == len(cascade_ious)
        assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG,  \
            "CascadeROIHeads only support class-agnostic regression now!"
        assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0]
        # fmt: on

        in_channels = [input_shape[f].channels for f in in_features]
        # Check all channel counts are equal
        assert len(set(in_channels)) == 1, in_channels
        in_channels = in_channels[0]

        box_pooler = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type=pooler_type,
        )
        pooled_shape = ShapeSpec(channels=in_channels,
                                 width=pooler_resolution,
                                 height=pooler_resolution)

        box_heads, box_predictors, proposal_matchers = [], [], []
        for match_iou, bbox_reg_weights in zip(cascade_ious,
                                               cascade_bbox_reg_weights):
            box_head = build_box_head(cfg, pooled_shape)
            box_heads.append(box_head)
            box_predictors.append(
                RotatedFastRCNNOutputLayers(
                    cfg,
                    box_head.output_shape,
                    box2box_transform=Box2BoxTransformRotated(
                        weights=bbox_reg_weights),
                ))
            proposal_matchers.append(
                Matcher([match_iou], [0, 1], allow_low_quality_matches=False))
        return {
            "box_in_features": in_features,
            "box_pooler": box_pooler,
            "box_heads": box_heads,
            "box_predictors": box_predictors,
            "proposal_matchers": proposal_matchers,
        }
Exemple #17
0
    def __init__(self, cfg=None):
        super(Yolov3, self).__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)
        self.num_classes = cfg.MODEL.YOLOV3.NUM_CLASSES
        self.norm = cfg.MODEL.YOLOV3.NORM
        self.in_features = cfg.MODEL.YOLOV3.IN_FEATURES
        self.anchors = cfg.MODEL.ANCHOR_GENERATOR.SIZES

        # Inference parameters:
        self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE

        self.backbone = build_backbone(cfg)

        self.head = Yolov3Head(in_features=self.in_features,
                               in_channels=[
                                   self.backbone._out_feature_channels[f]
                                   for f in self.in_features
                               ],
                               out_channels=cfg.MODEL.YOLOV3.HEAD.OUT_CHANNELS,
                               num_classes=self.num_classes,
                               num_anchors_per_cell=3,
                               norm=self.norm)
        backbone_shape = self.backbone.output_shape()
        self.feature_strides = [
            backbone_shape[f].stride for f in self.in_features
        ]
        self.feature_shapes = [backbone_shape[f] for f in self.in_features]

        self.anchor_generator = build_anchor_generator(cfg,
                                                       self.feature_shapes)
        self.grid_generator = build_grid_generator(cfg, self.feature_shapes)
        self.stride_generator = build_stride_generator(cfg,
                                                       self.feature_shapes)

        # self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        self.normalizer = lambda x: x / 255.0
        self.to(self.device)
        self.get_conv_bn_modules()

        self.bce_loss = nn.BCELoss()
        self.sigmoid = nn.Sigmoid()
    def __init__(self, cfg):
        super().__init__()
        self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINANET.IN_FEATURES
        self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE

        self.num_points = cfg.MODEL.PROPOSAL_GENERATOR.NUM_POINTS

        self.backbone = build_backbone(cfg)
        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = ReppointsRetinaNetHead(cfg, feature_shapes)
        grid = uniform_grid(2048)
        self.register_buffer("grid", grid)
        self.point_strides = [8, 16, 32, 64, 128]
        self.loss_normalizer = 20  # initialize with any reasonable #fg that's not too small
        self.loss_normalizer_momentum = 0.9
        input_shape = self.backbone.output_shape()
        self.strides = [input_shape[f].stride for f in self.in_features]

        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
        self.vis_period = 1024

        # Assigning init box labels.
        if cfg.MODEL.PROPOSAL_GENERATOR.SAMPLE_MODE == 'points':
            from slender_det.modeling.matchers.rep_matcher import rep_points_match
            self.matcher = rep_points_match
        elif cfg.MODEL.PROPOSAL_GENERATOR.SAMPLE_MODE == 'nearest_points':
            from slender_det.modeling.matchers.rep_matcher import nearest_point_match
            self.matcher = nearest_point_match
        else:
            assert cfg.MODEL.PROPOSAL_GENERATOR.SAMPLE_MODE == 'inside'
            from slender_det.modeling.matchers.rep_matcher import inside_match
            self.matcher = inside_match

        # Used for matching refine box labels.
        self.bbox_matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )
Exemple #19
0
def build_matcher(cfg):
    type = cfg.MODEL.RPN.MATCHER.TYPE
    assert type in MATCHER_TYPES, "Matcher Type doesn't exist!" \
                                  "Expected one in {}," \
                                  "But got {}".format(MATCHER_TYPES, type)
    if type == "Origin":
        return Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                       cfg.MODEL.RPN.IOU_LABELS,
                       allow_low_quality_matches=True)
    elif type == "TopK":
        return TopKMatcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                           cfg.MODEL.RPN.IOU_LABELS,
                           cfg.MODEL.RPN.MATCHER.TOPK)
    else:
        raise ValueError("Unknown Matcher type: {}".format(type))
    def __init__(self, dataset_name, cfg, distributed, output_dir=None):
        """
        Args:
            dataset_name (str): name of the dataset to be evaluated.
                It must have either the following corresponding metadata:

                    "json_file": the path to the COCO format annotation

                Or it must be in detectron2's standard dataset format
                so it can be converted to COCO format automatically.
            cfg (CfgNode): config instance
            distributed (True): if True, will collect results from all ranks for evaluation.
                Otherwise, will evaluate the results in the current process.
            output_dir (str): optional, an output directory to dump all
                results predicted on the dataset. The dump contains:

                "instances_results.json" a json file containing the evaluation results.
        """

        self._predictions = []
        self._fiber_results = []
        self._results = None

        # Matcher to assign predictions to annotations
        self._bbox_matcher = Matcher(
            cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
            cfg.MODEL.ROI_HEADS.IOU_LABELS,
            allow_low_quality_matches=False,
        )

        self._tasks = ("fiberwidth", "fiberlength")
        self._modes = ("strict", "loose")
        self._distributed = distributed
        self._output_dir = output_dir

        self._cpu_device = torch.device("cpu")
        self._logger = logging.getLogger(__name__)

        self._metadata = MetadataCatalog.get(dataset_name)
        assert hasattr(
            self._metadata, "json_file"
        ), f"json_file was not found in MetaDataCatalog for '{dataset_name}'"

        self._get_annotations()
Exemple #21
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINANET.IN_FEATURES
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # fmt: on

        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]

        self.head = RetinaNetHead(cfg, feature_shapes)

        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransformRotated(
            weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Exemple #22
0
    def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
        in_features = cfg.MODEL.RPN.IN_FEATURES
        ret = {
            "in_features":
            in_features,
            "min_box_size":
            cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE,
            "nms_thresh":
            cfg.MODEL.RPN.NMS_THRESH,
            "batch_size_per_image":
            cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE,
            "positive_fraction":
            cfg.MODEL.RPN.POSITIVE_FRACTION,
            "loss_weight": {
                "loss_rpn_cls":
                cfg.MODEL.RPN.LOSS_WEIGHT,
                "loss_rpn_loc":
                cfg.MODEL.RPN.BBOX_REG_LOSS_WEIGHT * cfg.MODEL.RPN.LOSS_WEIGHT,
            },
            "anchor_boundary_thresh":
            cfg.MODEL.RPN.BOUNDARY_THRESH,
            "box2box_transform":
            Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS),
            "box_reg_loss_type":
            cfg.MODEL.RPN.BBOX_REG_LOSS_TYPE,
            "smooth_l1_beta":
            cfg.MODEL.RPN.SMOOTH_L1_BETA,
        }

        ret["pre_nms_topk"] = (cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN,
                               cfg.MODEL.RPN.PRE_NMS_TOPK_TEST)
        ret["post_nms_topk"] = (cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN,
                                cfg.MODEL.RPN.POST_NMS_TOPK_TEST)

        ret["anchor_generator"] = build_anchor_generator(
            cfg, [input_shape[f] for f in in_features])
        ret["anchor_matcher"] = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                                        cfg.MODEL.RPN.IOU_LABELS,
                                        allow_low_quality_matches=True)
        ret["head"] = build_rpn_head(cfg,
                                     [input_shape[f] for f in in_features])
        return ret
Exemple #23
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super().__init__()

        # fmt: off
        self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
        self.in_features = cfg.MODEL.RPN.IN_FEATURES
        self.nms_thresh = cfg.MODEL.RPN.NMS_THRESH
        self.batch_size_per_image = cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE
        self.positive_fraction = cfg.MODEL.RPN.POSITIVE_FRACTION
        self.smooth_l1_beta = cfg.MODEL.RPN.SMOOTH_L1_BETA
        self.loss_weight = cfg.MODEL.RPN.LOSS_WEIGHT
        self.num_frames = cfg.MODEL.SPATIOTEMPORAL.NUM_FRAMES
        self.temporal_dropout = cfg.MODEL.SPATIOTEMPORAL.TEMPORAL_DROPOUT
        # fmt: on

        if self.temporal_dropout:
            assert cfg.MODEL.SPATIOTEMPORAL.FORWARD_AGGREGATION, "Temporal dropout without forward aggregation."

        if cfg.MODEL.SPATIOTEMPORAL.FORWARD_AGGREGATION:
            # (f_{t-NUM_FRAMES}, ..., f_{t-1}, f_t, f_{t+1}, ..., f_{t+NUM_FRAMES})
            self.num_frames = (2 * self.num_frames) + 1

        # Map from self.training state to train/test settings
        self.pre_nms_topk = {
            True: cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN,
            False: cfg.MODEL.RPN.PRE_NMS_TOPK_TEST,
        }
        self.post_nms_topk = {
            True: cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN,
            False: cfg.MODEL.RPN.POST_NMS_TOPK_TEST,
        }
        self.boundary_threshold = cfg.MODEL.RPN.BOUNDARY_THRESH

        self.anchor_generator = build_anchor_generator(
            cfg, [input_shape[f] for f in self.in_features])
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        self.anchor_matcher = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                                      cfg.MODEL.RPN.IOU_LABELS,
                                      allow_low_quality_matches=True)
        self.rpn_head = build_rpn_head(
            cfg, [input_shape[f] for f in self.in_features])
Exemple #24
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     backbone_shape = backbone.output_shape()
     feature_shapes = [backbone_shape[f] for f in cfg.MODEL.RETINANET.IN_FEATURES]
     anchor_generator = build_anchor_generator(cfg, feature_shapes)
     metadata = MetadataCatalog.get(
         cfg.DATASETS.TRAIN[0] if len(cfg.DATASETS.TRAIN) else "__unused"
     )
     return {
         "backbone": backbone,
         "head": RetinaFaceHead(cfg, feature_shapes),
         "anchor_generator": anchor_generator,
         "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS),
         "mark2mark_transform": Mark2MarkTransform(cfg.MODEL.RETINAFACE.NUM_LANDMARK,
                                                   weights=cfg.MODEL.RETINAFACE.LANDMARK_REG_WEIGHTS),
         "anchor_matcher": Matcher(
             cfg.MODEL.RETINANET.IOU_THRESHOLDS,
             cfg.MODEL.RETINANET.IOU_LABELS,
             allow_low_quality_matches=True,
         ),
         "pixel_mean": cfg.MODEL.PIXEL_MEAN,
         "pixel_std": cfg.MODEL.PIXEL_STD,
         "num_classes": cfg.MODEL.RETINANET.NUM_CLASSES,
         "num_landmark": cfg.MODEL.RETINAFACE.NUM_LANDMARK,
         "head_in_features": cfg.MODEL.RETINANET.IN_FEATURES,
         # Loss parameters:
         "focal_loss_alpha": cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA,
         "focal_loss_gamma": cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA,
         "smooth_l1_beta": cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA,
         "box_reg_loss_type": cfg.MODEL.RETINANET.BBOX_REG_LOSS_TYPE,
         "loc_weight": cfg.MODEL.RETINAFACE.LOC_WEIGHT,
         # Inference parameters:
         "test_score_thresh": cfg.MODEL.RETINANET.SCORE_THRESH_TEST,
         "test_topk_candidates": cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST,
         "test_nms_thresh": cfg.MODEL.RETINANET.NMS_THRESH_TEST,
         "max_detections_per_image": cfg.TEST.DETECTIONS_PER_IMAGE,
         # Vis parameters
         "vis_period": cfg.VIS_PERIOD,
         "input_format": cfg.INPUT.FORMAT,
         "visualizer": TrainingVisualizer(detector_postprocess, metadata),
     }
    def __init__(self, cfg):
        super().__init__()

        self.in_features = cfg.MODEL.FCOS.IN_FEATURES

        # Loss parameters:
        # defined by method<get_ground_truth>
        self.num_points_per_level = None
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
        self.center_sampling_radius = cfg.MODEL.FCOS.CENTER_SAMPLING_RADIUS
        self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS

        self.focal_loss_alpha = cfg.MODEL.FCOS.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.FCOS.FOCAL_LOSS_GAMMA
        self.iou_loss_type = cfg.MODEL.FCOS.IOU_LOSS_TYPE

        # Inference parameters:
        self.score_thresh = 0.3
        self.pre_nms_thresh = cfg.MODEL.FCOS.INFERENCE_TH
        self.pre_nms_top_n = cfg.MODEL.FCOS.PRE_NMS_TOP_N
        self.nms_thresh = cfg.MODEL.FCOS.NMS_TH
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        self.min_size = 0
        self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES

        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = FCOSRepPointsHead(cfg, feature_shapes)

        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))

        self.bbox_matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )
Exemple #26
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__(cfg, input_shape)
        head_params = cfg.MODEL.META_ARCH

        self.box_reg_loss_type = head_params.BBOX_REG_LOSS_TYPE
        self.anchor_generator = build_anchor_generator(cfg, input_shape)
        self.num_anchor = self.anchor_generator.num_cell_anchors[0]
        self.feat_adaptive = head_params.FEAT_ADAPTION

        # init bbox pred
        self.loc_init_conv = nn.Conv2d(self.feat_channels,
                                       self.loc_feat_channels, 3, 1, 1)
        self.loc_init_out = nn.Conv2d(self.loc_feat_channels, 4, 3, 1, 1)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=head_params.BBOX_REG_WEIGHTS)
        self.anchor_matcher = Matcher(
            head_params.IOU_THRESHOLDS,
            head_params.IOU_LABELS,
            allow_low_quality_matches=True,
        )
        self.strides = [i.stride for i in input_shape]
        self.matcher = nearest_point_match

        # make feature adaptive layer
        self.make_feature_adaptive_layers()

        self.cls_out = nn.Conv2d(self.feat_channels,
                                 self.num_anchor * self.num_classes, 3, 1, 1)
        self.loc_refine_out = nn.Conv2d(self.loc_feat_channels,
                                        self.num_anchor * 4, 3, 1, 1)

        self._init_weights()

        self.loss_normalizer = 100  # initialize with any reasonable #fg that's not too small
        self.loss_normalizer_momentum = 0.9

        grid = uniform_grid(2048)
        self.register_buffer("grid", grid)
Exemple #27
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super(StandardHOROIHeads, self).__init__()
        # fmt: off
        self.in_features                  = cfg.MODEL.ROI_HEADS.IN_FEATURES
        self.proposal_append_gt           = cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT
        self.num_classes                  = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        self.num_actions                  = cfg.MODEL.ROI_HEADS.NUM_ACTIONS
        self.box_batch_size_per_image     = cfg.MODEL.ROI_HEADS.BOX_BATCH_SIZE_PER_IMAGE
        self.hoi_batch_size_per_image     = cfg.MODEL.ROI_HEADS.HOI_BATCH_SIZE_PER_IMAGE
        self.box_positive_sample_fraction = cfg.MODEL.ROI_HEADS.BOX_POSITIVE_FRACTION
        self.hoi_positive_sample_fraction = cfg.MODEL.ROI_HEADS.HOI_POSITIVE_FRACTION
        # fmt: on

        # Matcher to assign box proposals to gt boxes
        self.proposal_matcher = Matcher(
            cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
            cfg.MODEL.ROI_HEADS.IOU_LABELS,
            allow_low_quality_matches=False,
        )

        self._init_box_head(cfg, input_shape)
        self._init_hoi_head(cfg, input_shape)
    def __init__(
            self,
            images,
            locations,
            logits_pred,
            reg_pred,
            ctrness_pred,
            mask_regression,
            mask_encoding,
            focal_loss_alpha,
            focal_loss_gamma,
            iou_loss,
            center_sample,
            sizes_of_interest,
            strides,
            radius,
            num_classes,
            pre_nms_thresh,
            pre_nms_top_n,
            nms_thresh,
            fpn_post_nms_top_n,
            thresh_with_ctr,
            gt_instances=None,
            cfg=None,
    ):
        self.cfg = cfg
        self.logits_pred = logits_pred
        self.reg_pred = reg_pred
        self.ctrness_pred = ctrness_pred
        self.locations = locations
        self.mask_regression = mask_regression
        self.mask_encoding = mask_encoding

        self.gt_instances = gt_instances
        self.num_feature_maps = len(logits_pred)
        self.num_images = len(images)
        self.image_sizes = images.image_sizes
        self.focal_loss_alpha = focal_loss_alpha
        self.focal_loss_gamma = focal_loss_gamma
        self.iou_loss = iou_loss
        self.center_sample = center_sample
        self.sizes_of_interest = sizes_of_interest
        self.strides = strides
        self.radius = radius
        self.num_classes = num_classes
        self.pre_nms_thresh = pre_nms_thresh
        self.pre_nms_top_n = pre_nms_top_n
        self.nms_thresh = nms_thresh
        self.fpn_post_nms_top_n = fpn_post_nms_top_n
        self.thresh_with_ctr = thresh_with_ctr

        self.loss_on_mask = cfg.MODEL.SMInst.LOSS_ON_MASK
        self.loss_on_code = cfg.MODEL.SMInst.LOSS_ON_CODE
        self.mask_loss_type = cfg.MODEL.SMInst.MASK_LOSS_TYPE
        self.num_codes = cfg.MODEL.SMInst.NUM_CODE
        self.mask_size = cfg.MODEL.SMInst.MASK_SIZE
        self.mask_sparse_weight = cfg.MODEL.SMInst.MASK_SPARSE_WEIGHT
        self.mask_loss_weight = cfg.MODEL.SMInst.MASK_LOSS_WEIGHT
        self.sparsity_loss_type = cfg.MODEL.SMInst.SPARSITY_LOSS_TYPE
        self.kl_rho = cfg.MODEL.SMInst.SPARSITY_KL_RHO

        # Matcher to assign box proposals to gt boxes
        self.proposal_matcher = Matcher(
            cfg.MODEL.SMInst.IOU_THRESHOLDS,
            cfg.MODEL.SMInst.IOU_LABELS,
            allow_low_quality_matches=False,
        )
Exemple #29
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
        self.num_classes = cfg.MODEL.SparseRCNN.NUM_CLASSES
        self.num_proposals = cfg.MODEL.SparseRCNN.NUM_PROPOSALS
        self.hidden_dim = cfg.MODEL.SparseRCNN.HIDDEN_DIM
        self.num_heads = cfg.MODEL.SparseRCNN.NUM_HEADS

        # Build Backbone.
        self.backbone = build_backbone(cfg)
        self.size_divisibility = self.backbone.size_divisibility
        
        # Build Proposals.
        self.init_proposal_features = nn.Embedding(self.num_proposals, self.hidden_dim)
        self.init_proposal_boxes = nn.Embedding(self.num_proposals, 4)
        nn.init.constant_(self.init_proposal_boxes.weight[:, :2], 0.5)
        nn.init.constant_(self.init_proposal_boxes.weight[:, 2:], 1.0)
        
        # Build Dynamic Head.
        self.head = DynamicHead(cfg=cfg, roi_input_shape=self.backbone.output_shape())

        # TODO #2 mask head
        self.mask_pooler, self.mask_head = self._init_mask_head(cfg, input_shape=self.backbone.output_shape())
        self.proposal_append_gt = cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT
        self.proposal_matcher = Matcher(
                cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS,
                cfg.MODEL.ROI_HEADS.IOU_LABELS,
                allow_low_quality_matches=False,
            )

        # Loss parameters:
        class_weight = cfg.MODEL.SparseRCNN.CLASS_WEIGHT
        giou_weight = cfg.MODEL.SparseRCNN.GIOU_WEIGHT
        l1_weight = cfg.MODEL.SparseRCNN.L1_WEIGHT
        mask_weight = cfg.MODEL.SparseRCNN.MASK_WEIGHT
        no_object_weight = cfg.MODEL.SparseRCNN.NO_OBJECT_WEIGHT
        self.deep_supervision = cfg.MODEL.SparseRCNN.DEEP_SUPERVISION
        self.use_focal = cfg.MODEL.SparseRCNN.USE_FOCAL

        # Build Criterion.
        matcher = HungarianMatcher(cfg=cfg,
                                   cost_class=class_weight, 
                                   cost_bbox=l1_weight, 
                                   cost_giou=giou_weight,
                                   use_focal=self.use_focal)
        weight_dict = {"loss_ce": class_weight, "loss_bbox": l1_weight, "loss_giou": giou_weight, "loss_mask": mask_weight}
        if self.deep_supervision:
            aux_weight_dict = {}
            for i in range(self.num_heads - 1):
                aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()})
            weight_dict.update(aux_weight_dict)

        losses = ["labels", "boxes"]

        self.criterion = SetCriterion(cfg=cfg,
                                      num_classes=self.num_classes,
                                      matcher=matcher,
                                      weight_dict=weight_dict,
                                      eos_coef=no_object_weight,
                                      losses=losses,
                                      use_focal=self.use_focal)

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Exemple #30
0
 def f(thresholds: List[float], labels: List[int]):
     return Matcher(thresholds, labels, allow_low_quality_matches=True)