Example #1
0
def setup(file):
    # get cfg
    cfg = get_cfg()
    cfg.merge_from_file(file)
    cfg.SOLVER.IMS_PER_BATCH = 2

    # get data loader iter
    data_loader = build_detection_train_loader(cfg)
    data_loader_iter = iter(data_loader)
    batched_inputs = next(data_loader_iter)

    # build anchors
    backbone = build_backbone(cfg).to(device)
    images = [x["image"].to(device) for x in batched_inputs]
    images = ImageList.from_tensors(images, backbone.size_divisibility)
    features = backbone(images.tensor.float())

    input_shape = backbone.output_shape()
    in_features = cfg.MODEL.RPN.IN_FEATURES
    anchor_generator = build_anchor_generator(
        cfg, [input_shape[f] for f in in_features])
    anchors = anchor_generator([features[f] for f in in_features])
    anchors = Boxes.cat(anchors).to(device)

    # build matcher
    raw_matcher = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                          cfg.MODEL.RPN.IOU_LABELS,
                          allow_low_quality_matches=True)
    matcher = TopKMatcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                          cfg.MODEL.RPN.IOU_LABELS, 9)

    return cfg, data_loader_iter, anchors, matcher, raw_matcher
Example #2
0
    def __init__(self, cfg):
        super().__init__()

        self.in_features = cfg.MODEL.FCOS.IN_FEATURES

        # Loss parameters:
        # defined by method<get_ground_truth>
        self.num_points_per_level = None
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
        self.center_sampling_radius = cfg.MODEL.FCOS.CENTER_SAMPLING_RADIUS
        self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS

        self.focal_loss_alpha = cfg.MODEL.FCOS.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.FCOS.FOCAL_LOSS_GAMMA
        self.iou_loss_type = cfg.MODEL.FCOS.IOU_LOSS_TYPE

        # Inference parameters:
        self.score_thresh = 0.3
        self.pre_nms_thresh = cfg.MODEL.FCOS.INFERENCE_TH
        self.pre_nms_top_n = cfg.MODEL.FCOS.PRE_NMS_TOP_N
        self.nms_thresh = cfg.MODEL.FCOS.NMS_TH
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        self.min_size = 0
        self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES

        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = FCOSAnchorHead(cfg, feature_shapes)
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)
        self.num_anchors = self.anchor_generator.num_cell_anchors[0]

        self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()

        # Standard RPN is shared across levels:
        out_channels = cfg.MODEL.BUA.RPN.CONV_OUT_CHANNELS

        in_channels = [s.channels for s in input_shape]
        assert len(set(in_channels)) == 1, "Each level must have the same channel!"
        in_channels = in_channels[0]

        # RPNHead should take the same input as anchor generator
        # NOTE: it assumes that creating an anchor generator does not have unwanted side effect.
        anchor_generator = build_anchor_generator(cfg, input_shape)
        num_cell_anchors = anchor_generator.num_cell_anchors
        box_dim = anchor_generator.box_dim
        assert (
            len(set(num_cell_anchors)) == 1
        ), "Each level must have the same number of cell anchors"
        num_cell_anchors = num_cell_anchors[0]

        # 3x3 conv for the hidden representation
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
        # 1x1 conv for predicting objectness logits
        self.objectness_logits = nn.Conv2d(out_channels, num_cell_anchors * 2, kernel_size=1, stride=1)
        # 1x1 conv for predicting box2box transform deltas
        self.anchor_deltas = nn.Conv2d(
            out_channels, num_cell_anchors * box_dim, kernel_size=1, stride=1
        )

        for l in [self.conv, self.objectness_logits, self.anchor_deltas]:
            nn.init.normal_(l.weight, std=0.01)
            nn.init.constant_(l.bias, 0)
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super().__init__()

        # fmt: off
        self.min_box_side_len        = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
        self.in_features             = cfg.MODEL.RPN.IN_FEATURES
        self.nms_thresh              = cfg.MODEL.RPN.NMS_THRESH
        self.batch_size_per_image    = cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE
        self.positive_fraction       = cfg.MODEL.RPN.POSITIVE_FRACTION
        self.smooth_l1_beta          = cfg.MODEL.RPN.SMOOTH_L1_BETA
        self.loss_weight             = cfg.MODEL.RPN.LOSS_WEIGHT
        # fmt: on

        # Map from self.training state to train/test settings
        self.pre_nms_topk = {
            True: cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN,
            False: cfg.MODEL.RPN.PRE_NMS_TOPK_TEST,
        }
        self.post_nms_topk = {
            True: cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN,
            False: cfg.MODEL.RPN.POST_NMS_TOPK_TEST,
        }
        self.boundary_threshold = cfg.MODEL.RPN.BOUNDARY_THRESH

        self.anchor_generator = build_anchor_generator(
            cfg, [input_shape[f] for f in self.in_features]
        )
        self.box2box_transform = BUABox2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        self.anchor_matcher = Matcher(
            cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True
        )
        self.rpn_head = build_rpn_head(cfg, [input_shape[f] for f in self.in_features])
Example #5
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()
        # fmt: off
        in_channels = input_shape[0].channels
        num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        num_convs = cfg.MODEL.RETINANET.NUM_CONVS
        prior_prob = cfg.MODEL.RETINANET.PRIOR_PROB
        num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors
        # fmt: on
        assert (
            len(set(num_anchors)) == 1
        ), "Using different number of anchors between levels is not currently supported!"
        num_anchors = num_anchors[0]

        self.ssh = []
        for i in range(len(cfg.MODEL.RETINANET.IN_FEATURES)):
            ssh = SSH(cfg, in_channels, in_channels)
            name = "ssh" + str(i)
            self.add_module(name, ssh)
            self.ssh.append(ssh)

        cls_subnet = []
        bbox_subnet = []
        for _ in range(num_convs):
            cls_subnet.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            cls_subnet.append(nn.ReLU())
            bbox_subnet.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            bbox_subnet.append(nn.ReLU())

        self.cls_score = nn.Conv2d(in_channels,
                                   num_anchors * num_classes,
                                   kernel_size=1,
                                   stride=1,
                                   padding=0)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   num_anchors * 4,
                                   kernel_size=1,
                                   stride=1,
                                   padding=0)

        # Initialization
        for modules in [self.cls_score, self.bbox_pred]:
            for layer in modules.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
                    torch.nn.init.constant_(layer.bias, 0)

        # Use prior in model initialization to improve stability
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_score.bias, bias_value)
Example #6
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     backbone_shape = backbone.output_shape()
     backbone_level = cfg.MODEL.YOLOF.ENCODER.BACKBONE_LEVEL
     feature_shapes = [backbone_shape[backbone_level]]
     encoder = DilatedEncoder(cfg, backbone_shape)
     decoder = Decoder(cfg)
     anchor_generator = build_anchor_generator(cfg, feature_shapes)
     return {
         "backbone":
         backbone,
         "encoder":
         encoder,
         "decoder":
         decoder,
         "anchor_generator":
         anchor_generator,
         "box2box_transform":
         YOLOFBox2BoxTransform(
             weights=cfg.MODEL.YOLOF.BOX_TRANSFORM.BBOX_REG_WEIGHTS,
             add_ctr_clamp=cfg.MODEL.YOLOF.BOX_TRANSFORM.ADD_CTR_CLAMP,
             ctr_clamp=cfg.MODEL.YOLOF.BOX_TRANSFORM.CTR_CLAMP),
         "anchor_matcher":
         UniformMatcher(cfg.MODEL.YOLOF.MATCHER.TOPK),
         "pixel_mean":
         cfg.MODEL.PIXEL_MEAN,
         "pixel_std":
         cfg.MODEL.PIXEL_STD,
         "num_classes":
         cfg.MODEL.YOLOF.DECODER.NUM_CLASSES,
         "backbone_level":
         backbone_level,
         # Ignore thresholds:
         "pos_ignore_thresh":
         cfg.MODEL.YOLOF.POS_IGNORE_THRESHOLD,
         "neg_ignore_thresh":
         cfg.MODEL.YOLOF.NEG_IGNORE_THRESHOLD,
         # Loss parameters:
         "focal_loss_alpha":
         cfg.MODEL.YOLOF.LOSSES.FOCAL_LOSS_ALPHA,
         "focal_loss_gamma":
         cfg.MODEL.YOLOF.LOSSES.FOCAL_LOSS_GAMMA,
         "box_reg_loss_type":
         cfg.MODEL.YOLOF.LOSSES.BBOX_REG_LOSS_TYPE,
         # Inference parameters:
         "test_score_thresh":
         cfg.MODEL.YOLOF.SCORE_THRESH_TEST,
         "test_topk_candidates":
         cfg.MODEL.YOLOF.TOPK_CANDIDATES_TEST,
         "test_nms_thresh":
         cfg.MODEL.YOLOF.NMS_THRESH_TEST,
         "max_detections_per_image":
         cfg.MODEL.YOLOF.DETECTIONS_PER_IMAGE,
         # Vis parameters
         "vis_period":
         cfg.VIS_PERIOD,
         "input_format":
         cfg.INPUT.FORMAT,
     }
Example #7
0
    def __init__(self, cfg, feature_shapes, weights=[1.0, 1.0, 1.0, 1.0], scale_clamp=_DEFAULT_SCALE_CLAMP):
        super().__init__()
        self.weights = weights
        self.scale_clamp = scale_clamp
        
        # Build heads.
        num_classes = cfg.MODEL.OneNet.NUM_CLASSES
        d_model = cfg.MODEL.FPN.OUT_CHANNELS
        activation = cfg.MODEL.OneNet.ACTIVATION
        num_conv = cfg.MODEL.OneNet.NUM_CONV
        conv_norm = cfg.MODEL.OneNet.CONV_NORM
        num_levels = len(cfg.MODEL.OneNet.IN_FEATURES)
        conv_channels = cfg.MODEL.OneNet.CONV_CHANNELS

        self.num_classes = num_classes
        self.d_model = d_model
        self.num_classes = num_classes
        self.activation = _get_activation_fn(activation)
        self.features_stride = cfg.MODEL.OneNet.FEATURES_STRIDE
        
        cls_conv_module = list()
        for idx in range(num_conv):
            if idx == 0:
                cls_conv_module.append(nn.Conv2d(d_model, conv_channels, kernel_size=3, stride=1, padding=1, bias=False))
            else:
                cls_conv_module.append(nn.Conv2d(conv_channels, conv_channels, kernel_size=3, stride=1, padding=1, bias=False))

            cls_conv_module.append(nn.ReLU(inplace=True))

        self.cls_conv_module = nn.ModuleList(cls_conv_module)

        reg_conv_module = list()
        for idx in range(num_conv):
            if idx == 0:
                reg_conv_module.append(nn.Conv2d(d_model, conv_channels, kernel_size=3, stride=1, padding=1, bias=False))
            else:
                reg_conv_module.append(nn.Conv2d(conv_channels, conv_channels, kernel_size=3, stride=1, padding=1, bias=False))

            reg_conv_module.append(nn.ReLU(inplace=True))

        self.reg_conv_module = nn.ModuleList(reg_conv_module)
        
        anchor_generator = build_anchor_generator(cfg, feature_shapes)
        self.anchor_generator = anchor_generator
        num_anchors = anchor_generator.num_cell_anchors
        assert (
            len(set(num_anchors)) == 1
        ), "Using different number of anchors between levels is not currently supported!"
        self.num_anchors = num_anchors[0]
        
        self.cls_score = nn.Conv2d(conv_channels, self.num_anchors * num_classes, kernel_size=3, stride=1, padding=1)
        self.bbox_pred = nn.Conv2d(conv_channels, self.num_anchors * 4, kernel_size=3, stride=1, padding=1)

        # Init parameters.
        prior_prob = cfg.MODEL.OneNet.PRIOR_PROB
        self.bias_value = -math.log((1 - prior_prob) / prior_prob)
        self._reset_parameters()
Example #8
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()
        # fmt: off
        in_channels = input_shape[0].channels
        num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        num_convs   = cfg.MODEL.RETINANET.NUM_CONVS
        prior_prob  = cfg.MODEL.RETINANET.PRIOR_PROB
        norm        = cfg.MODEL.RETINANET.NORM
        # Disabling shared norm causes backwards compatibility issues
        # Hardcode to true for now
        # shared_norm = cfg.MODEL.RETINANET.SHARED_NORM

        num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors
        # fmt: on
        assert (
            len(set(num_anchors)) == 1
        ), "Using different number of anchors between levels is not currently supported!"
        num_anchors = num_anchors[0]

        if norm == "BN" or norm == "SyncBN":
            logger = logging.getLogger(__name__)
            logger.warn("Shared norm does not work well for BN, SyncBN, expect poor results")

        cls_subnet = []
        bbox_subnet = []
        for _ in range(num_convs):
            cls_subnet.append(
                nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
            )
            if norm:
                cls_subnet.append(get_norm(norm, in_channels))
            cls_subnet.append(nn.ReLU())
            bbox_subnet.append(
                nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
            )
            if norm:
                bbox_subnet.append(get_norm(norm, in_channels))
            bbox_subnet.append(nn.ReLU())

        self.cls_subnet = nn.Sequential(*cls_subnet)
        self.bbox_subnet = nn.Sequential(*bbox_subnet)
        self.cls_score = nn.Conv2d(
            in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1
        )
        self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1)

        # Initialization
        for modules in [self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred]:
            for layer in modules.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
                    torch.nn.init.constant_(layer.bias, 0)

        # Use prior in model initialization to improve stability
        bias_value = -(math.log((1 - prior_prob) / prior_prob))
        torch.nn.init.constant_(self.cls_score.bias, bias_value)
Example #9
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()
        # fmt: off
        tower_repeat = [3, 3, 3, 4, 4, 4, 5, 5]
        in_channels = input_shape[0].channels
        compound_coef = cfg.MODEL.EFFICIENTNET.COMPOUND_COEFFICIENT
        num_classes = cfg.MODEL.EFFICIENTDET.NUM_CLASSES
        num_convs = cfg.MODEL.EFFICIENTDET.NUM_CONVS
        num_convs = tower_repeat[compound_coef] if num_convs < 0 else num_convs
        prior_prob = cfg.MODEL.EFFICIENTDET.PRIOR_PROB
        num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors
        norm = cfg.MODEL.EFFICIENTDET.NORM
        export_onnx = cfg.MODEL.EXPORT_ONNX

        # fmt: on
        assert (
            len(set(num_anchors)) == 1
        ), "Using different number of anchors between levels is not currently supported!"
        num_anchors = num_anchors[0]

        cls_subnet = []
        bbox_subnet = []
        for _ in range(num_convs):
            cls_subnet.append(
                SeparableConvBlock(in_channels,
                                   norm=norm,
                                   activation=True,
                                   onnx_export=export_onnx))
            bbox_subnet.append(
                SeparableConvBlock(in_channels,
                                   norm=norm,
                                   activation=True,
                                   onnx_export=export_onnx))

        self.cls_subnet = nn.Sequential(*cls_subnet)
        self.bbox_subnet = nn.Sequential(*bbox_subnet)
        self.cls_score = SeparableConvBlock(in_channels,
                                            num_anchors * num_classes)
        self.bbox_pred = SeparableConvBlock(in_channels, num_anchors * 4)

        # Initialization
        for modules in [
                self.cls_subnet, self.bbox_subnet, self.cls_score,
                self.bbox_pred
        ]:
            for layer in modules.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
                    if layer.bias is not None:
                        torch.nn.init.constant_(layer.bias, 0)

        # Use prior in model initialization to improve stability
        bias_value = -(math.log((1 - prior_prob) / prior_prob))
        torch.nn.init.constant_(self.cls_score.pointwise_conv.bias, bias_value)
Example #10
0
    def __init__(self, cfg) -> None:
        super().__init__()

        self.num_classes: int = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features: List[str] = cfg.MODEL.RETINANET.IN_FEATURES
        # Loss parameters:
        self.focal_loss_alpha: float = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma: float = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta: float = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold: float = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates: int = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold: float = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image: int = cfg.TEST.DETECTIONS_PER_IMAGE
        # Vis parameters
        self.vis_period: int = cfg.VIS_PERIOD
        self.input_format: str = cfg.INPUT.FORMAT

        self.fpn: FPN = build_fpn_backbone(
            cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))

        backbone_fpn_output_shape: Dict[str,
                                        ShapeSpec] = self.fpn.output_shape()

        feature_shapes: List[ShapeSpec] = [
            backbone_fpn_output_shape[f] for f in self.in_features
        ]
        self.head: RetinaNetHead = RetinaNetHead(cfg, feature_shapes)

        self.anchor_generator: nn.Module = build_anchor_generator(
            cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform: Box2BoxTransform = Box2BoxTransform(
            weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)

        self.anchor_matcher: Matcher = Matcher(
            thresholds=cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            labels=cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True)

        self.register_buffer("pixel_mean",
                             torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1))
        self.register_buffer("pixel_std",
                             torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1))

        # In Detectron1, loss is normalized by number of foreground samples in the batch.
        # When batch size is 1 per GPU, #foreground has a large variance and
        # using it lead to lower performance. Here we maintain an EMA of #foreground to
        # stabilize the normalizer.

        # Initialize with any reasonable #fg that's not too small
        self.loss_normalizer: float = 100
        self.loss_normalizer_momentum: float = 0.9
    def __init__(self, cfg):
        super(RetinaNet, self).__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINANET.IN_FEATURES
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # Vis parameters
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
        # fmt: on

        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = RetinaNetHead(cfg, feature_shapes)
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
        num_channels = len(cfg.MODEL.PIXEL_MEAN)
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            num_channels, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            num_channels, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
        """
    In Detectron1, loss is normalized by number of foreground samples in the batch.
    When batch size is 1 per GPU, #foreground has a large variance and
    using it lead to lower performance. Here we maintain an EMA of #foreground to
    stabilize the normalizer.
    """
        self.loss_normalizer = 100  # initialize with any reasonable #fg that's not too small
        self.loss_normalizer_momentum = 0.9
    def __init__(self, cfg):
        super().__init__()

        self.num_classes = cfg.MODEL.RETINAFACE.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINAFACE.IN_FEATURES
        # loss parameters
        self.focal_loss_alpha = cfg.MODEL.RETINAFACE.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINAFACE.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.RETINAFACE.SMOOTH_L1_LOSS_BETA
        self.loc_weight = cfg.MODEL.RETINAFACE.LOC_WEIGHT
        # inference parameters
        self.score_threshold = cfg.MODEL.RETINAFACE.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINAFACE.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINAFACE.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # visualize parameters
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT

        self.backbone = build_backbone(cfg)
        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        self.head = RetinaFaceHead(cfg, feature_shapes)
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.RETINAFACE.BBOX_REG_WEIGHTS
        )
        self.landmark2landmark_transform = Landmark2LandmarkTransform(
            weights=cfg.MODEL.RETINAFACE.LANDMARK_REG_WEIGHTS
        )
        self.matcher = Matcher(
            cfg.MODEL.RETINAFACE.IOU_THRESHOLDS,
            cfg.MODEL.RETINAFACE.IOU_LABELS,
            allow_low_quality_matches=True
        )
        self.register_buffer(
            "pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)
        )
        self.register_buffer(
            "pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)
        )

        """
        In Detectron1, loss is normalized by number of foreground samples in the 
        batch. When batch size is 1 per GPU, #foreground has a large variance and
        using it lead to lower performance. Here we maintain an EMA of #foreground
        to stabilize the normalizer.
        """
        # initialize with any reasonable #fg that's not too small
        self.loss_normalizer = 100
        self.loss_normalizer_momentum = 0.9
Example #13
0
    def __init__(self, cfg):
        super().__init__()
        # fmt: off
        self.device                   = torch.device(cfg.MODEL.DEVICE)
        self.num_classes              = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features              = cfg.MODEL.RETINANET.IN_FEATURES
        # Mask parameters:
        self.discard_mask_area        = cfg.MODEL.YOLACT.DISCARD_MASK_AREA
        self.num_masks                = cfg.MODEL.YOLACT.NUM_MASKS
        # Loss parameters:
        self.sem_seg_alpha            = cfg.MODEL.YOLACT.SEM_SEG_ALPHA
        self.mask_alpha               = cfg.MODEL.YOLACT.MASK_ALPHA
        self.mask_reweight            = cfg.MODEL.YOLACT.MASK_REWEIGHT
        self.maskiou_alpha            = cfg.MODEL.YOLACT.MASKIOU_ALPHA
        self.focal_loss_alpha         = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma         = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta      = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold          = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates          = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold            = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # fmt: on

        # retinanet_resnet_fpn_backbone
        self.backbone = build_backbone(cfg)
        # dict[str->ShapeSpec]
        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]
        # base retinanet add mask coefficient branch 
        self.head = YolactHead(cfg, feature_shapes)
        # which layer output of backbone to protonet. see offical yolact's cfg.proto_src.
        # default is `res2`, but this is `res3`
        self.protonet = ProtoNet(cfg, feature_shapes[0])
        # to mask scoring
        self.maskiou_net = MaskIouNet(cfg)
        # semantic segmentation to help training
        self.semantic_seg_conv = nn.Conv2d(feature_shapes[0].channels, self.num_classes, 1)
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()
        # fmt: off
        in_channels = input_shape[0].channels
        num_classes = cfg.MODEL.RETINAFACE.NUM_CLASSES
        prior_prob = cfg.MODEL.RETINAFACE.PRIOR_PROB
        num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors
        # fmt: on
        assert (
            len(set(num_anchors)) == 1
        ), "Using different number of anchors between levels is not currently supported!"
        num_anchors = num_anchors[0]

        # Add SSH Module
        self.ssh = SSH(cfg, input_shape)

        # Add heads
        cls_score = []
        bbox_pred = []
        # NOTE enable landmark
        landmark_pred = []
        for _ in range(len(input_shape)):
            cls_score.append(
                nn.Conv2d(in_channels,
                          num_anchors * num_classes,
                          kernel_size=1,
                          stride=1,
                          padding=0))
            bbox_pred.append(
                nn.Conv2d(in_channels,
                          num_anchors * 4,
                          kernel_size=1,
                          stride=1,
                          padding=0))
            landmark_pred.append(
                nn.Conv2d(in_channels,
                          num_anchors * 10,
                          kernel_size=1,
                          stride=1,
                          padding=0))

        self.cls_score = nn.ModuleList(cls_score)
        self.bbox_pred = nn.ModuleList(bbox_pred)
        self.landmark_pred = nn.ModuleList(landmark_pred)

        # NOTE Initialization

        # Use prior in model initialization to improve stability
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        for cls_score in self.cls_score:
            torch.nn.init.constant_(cls_score.bias, bias_value)
Example #15
0
    def from_config(cls, cfg, input_shape):
        # Standard RPN is shared across levels:
        in_channels = [s.channels for s in input_shape]
        assert len(set(in_channels)) == 1, "Each level must have the same channel!"
        in_channels = in_channels[0]

        # RPNHead should take the same input as anchor generator
        # NOTE: it assumes that creating an anchor generator does not have unwanted side effect.
        anchor_generator = build_anchor_generator(cfg, input_shape)
        num_anchors = anchor_generator.num_anchors
        box_dim = anchor_generator.box_dim
        assert (
            len(set(num_anchors)) == 1
        ), "Each level must have the same number of anchors per spatial position"
        return {"in_channels": in_channels, "num_anchors": num_anchors[0], "box_dim": box_dim}
Example #16
0
    def from_config(cls, cfg, input_shape: List[ShapeSpec]):
        num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors
        assert (
            len(set(num_anchors)) == 1
        ), "Using different number of anchors between levels is not currently supported!"
        num_anchors = num_anchors[0]

        return {
            "input_shape": input_shape,
            "num_classes": cfg.MODEL.RETINANET.NUM_CLASSES,
            "conv_dims":
            [input_shape[0].channels] * cfg.MODEL.RETINANET.NUM_CONVS,
            "prior_prob": cfg.MODEL.RETINANET.PRIOR_PROB,
            "norm": cfg.MODEL.RETINANET.NORM,
            "num_anchors": num_anchors,
        }
Example #17
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()

        # Standard RPN is shared across levels:
        in_channels = [s.channels for s in input_shape]
        assert len(
            set(in_channels)) == 1, "Each level must have the same channel!"
        in_channels = in_channels[0]
        dwexpand_factor = cfg.MODEL.RPN.DWEXPAND_FACTOR
        norm = cfg.MODEL.RPN.NORM

        # RPNHead should take the same input as anchor generator
        # NOTE: it assumes that creating an anchor generator does not have unwanted side effect.
        anchor_generator = build_anchor_generator(cfg, input_shape)
        num_cell_anchors = anchor_generator.num_cell_anchors
        box_dim = anchor_generator.box_dim
        assert (len(set(num_cell_anchors)) == 1
                ), "Each level must have the same number of cell anchors"
        num_cell_anchors = num_cell_anchors[0]

        # 3x3 conv for the hidden representation
        expand_channels = dwexpand_factor * in_channels
        conv = []
        conv.append(Conv2d(in_channels, expand_channels, kernel_size=1, bias=not norm,\
                           norm=get_norm(norm, expand_channels), activation=F.relu))
        conv.append(Conv2d(expand_channels, expand_channels, kernel_size=5, padding=2, groups=expand_channels,\
                           bias=not norm, norm=get_norm(norm, expand_channels), activation=F.relu))
        self.add_module('conv', nn.Sequential(*conv))
        # in_channels = expand_channels
        # self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
        # 1x1 conv for predicting objectness logits
        self.objectness_logits = nn.Conv2d(in_channels,
                                           num_cell_anchors,
                                           kernel_size=1,
                                           stride=1)
        # 1x1 conv for predicting box2box transform deltas
        self.anchor_deltas = nn.Conv2d(in_channels,
                                       num_cell_anchors * box_dim,
                                       kernel_size=1,
                                       stride=1)

        for l in [*self.conv, self.objectness_logits, self.anchor_deltas]:
            nn.init.normal_(l.weight, std=0.01)
            if l.bias is not None:
                nn.init.constant_(l.bias, 0)
Example #18
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINANET.IN_FEATURES
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # fmt: on

        self.backbone = build_backbone(cfg)

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]

        self.head = RetinaNetHead(cfg, feature_shapes)

        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransformRotated(
            weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
Example #19
0
    def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
        in_features = cfg.MODEL.RPN.IN_FEATURES
        ret = {
            "in_features":
            in_features,
            "min_box_size":
            cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE,
            "nms_thresh":
            cfg.MODEL.RPN.NMS_THRESH,
            "batch_size_per_image":
            cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE,
            "positive_fraction":
            cfg.MODEL.RPN.POSITIVE_FRACTION,
            "loss_weight": {
                "loss_rpn_cls":
                cfg.MODEL.RPN.LOSS_WEIGHT,
                "loss_rpn_loc":
                cfg.MODEL.RPN.BBOX_REG_LOSS_WEIGHT * cfg.MODEL.RPN.LOSS_WEIGHT,
            },
            "anchor_boundary_thresh":
            cfg.MODEL.RPN.BOUNDARY_THRESH,
            "box2box_transform":
            Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS),
            "box_reg_loss_type":
            cfg.MODEL.RPN.BBOX_REG_LOSS_TYPE,
            "smooth_l1_beta":
            cfg.MODEL.RPN.SMOOTH_L1_BETA,
        }

        ret["pre_nms_topk"] = (cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN,
                               cfg.MODEL.RPN.PRE_NMS_TOPK_TEST)
        ret["post_nms_topk"] = (cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN,
                                cfg.MODEL.RPN.POST_NMS_TOPK_TEST)

        ret["anchor_generator"] = build_anchor_generator(
            cfg, [input_shape[f] for f in in_features])
        ret["anchor_matcher"] = Matcher(cfg.MODEL.RPN.IOU_THRESHOLDS,
                                        cfg.MODEL.RPN.IOU_LABELS,
                                        allow_low_quality_matches=True)
        ret["head"] = build_rpn_head(cfg,
                                     [input_shape[f] for f in in_features])
        return ret
Example #20
0
 def from_config(cls, cfg):
     backbone = build_backbone(cfg)
     backbone_shape = backbone.output_shape()
     feature_shapes = [backbone_shape[f] for f in cfg.MODEL.RETINANET.IN_FEATURES]
     anchor_generator = build_anchor_generator(cfg, feature_shapes)
     metadata = MetadataCatalog.get(
         cfg.DATASETS.TRAIN[0] if len(cfg.DATASETS.TRAIN) else "__unused"
     )
     return {
         "backbone": backbone,
         "head": RetinaFaceHead(cfg, feature_shapes),
         "anchor_generator": anchor_generator,
         "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS),
         "mark2mark_transform": Mark2MarkTransform(cfg.MODEL.RETINAFACE.NUM_LANDMARK,
                                                   weights=cfg.MODEL.RETINAFACE.LANDMARK_REG_WEIGHTS),
         "anchor_matcher": Matcher(
             cfg.MODEL.RETINANET.IOU_THRESHOLDS,
             cfg.MODEL.RETINANET.IOU_LABELS,
             allow_low_quality_matches=True,
         ),
         "pixel_mean": cfg.MODEL.PIXEL_MEAN,
         "pixel_std": cfg.MODEL.PIXEL_STD,
         "num_classes": cfg.MODEL.RETINANET.NUM_CLASSES,
         "num_landmark": cfg.MODEL.RETINAFACE.NUM_LANDMARK,
         "head_in_features": cfg.MODEL.RETINANET.IN_FEATURES,
         # Loss parameters:
         "focal_loss_alpha": cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA,
         "focal_loss_gamma": cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA,
         "smooth_l1_beta": cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA,
         "box_reg_loss_type": cfg.MODEL.RETINANET.BBOX_REG_LOSS_TYPE,
         "loc_weight": cfg.MODEL.RETINAFACE.LOC_WEIGHT,
         # Inference parameters:
         "test_score_thresh": cfg.MODEL.RETINANET.SCORE_THRESH_TEST,
         "test_topk_candidates": cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST,
         "test_nms_thresh": cfg.MODEL.RETINANET.NMS_THRESH_TEST,
         "max_detections_per_image": cfg.TEST.DETECTIONS_PER_IMAGE,
         # Vis parameters
         "vis_period": cfg.VIS_PERIOD,
         "input_format": cfg.INPUT.FORMAT,
         "visualizer": TrainingVisualizer(detector_postprocess, metadata),
     }
Example #21
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__(cfg, input_shape)
        head_params = cfg.MODEL.META_ARCH

        self.box_reg_loss_type = head_params.BBOX_REG_LOSS_TYPE
        self.anchor_generator = build_anchor_generator(cfg, input_shape)
        self.num_anchor = self.anchor_generator.num_cell_anchors[0]
        self.feat_adaptive = head_params.FEAT_ADAPTION

        # init bbox pred
        self.loc_init_conv = nn.Conv2d(self.feat_channels,
                                       self.loc_feat_channels, 3, 1, 1)
        self.loc_init_out = nn.Conv2d(self.loc_feat_channels, 4, 3, 1, 1)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=head_params.BBOX_REG_WEIGHTS)
        self.anchor_matcher = Matcher(
            head_params.IOU_THRESHOLDS,
            head_params.IOU_LABELS,
            allow_low_quality_matches=True,
        )
        self.strides = [i.stride for i in input_shape]
        self.matcher = nearest_point_match

        # make feature adaptive layer
        self.make_feature_adaptive_layers()

        self.cls_out = nn.Conv2d(self.feat_channels,
                                 self.num_anchor * self.num_classes, 3, 1, 1)
        self.loc_refine_out = nn.Conv2d(self.loc_feat_channels,
                                        self.num_anchor * 4, 3, 1, 1)

        self._init_weights()

        self.loss_normalizer = 100  # initialize with any reasonable #fg that's not too small
        self.loss_normalizer_momentum = 0.9

        grid = uniform_grid(2048)
        self.register_buffer("grid", grid)
Example #22
0
    def __init__(self, cfg, engine_path):
        super(TensorRTRetinaNet, self).__init__(engine_path)
        RetinaNetModel.__init__(self, cfg, self._engine)

        # preprocess parameters
        ns = types.SimpleNamespace()
        ns.training = False
        ns.input = self._cfg.INPUT
        ns.dynamic = self._cfg.INPUT.DYNAMIC
        ns.device = torch.device(self._cfg.MODEL.DEVICE)
        ns.pixel_mean = torch.tensor(self._cfg.MODEL.PIXEL_MEAN).view(
            -1, 1, 1).to(ns.device)
        ns.pixel_std = torch.tensor(self._cfg.MODEL.PIXEL_STD).view(
            -1, 1, 1).to(ns.device)

        ns.backbone = types.SimpleNamespace()
        ns.backbone.size_divisibility = 32

        # inference parameters
        ns.num_classes = self._cfg.MODEL.RETINANET.NUM_CLASSES
        ns.topk_candidates = self._cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        ns.score_threshold = self._cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        ns.nms_threshold = self._cfg.MODEL.RETINANET.NMS_THRESH_TEST
        ns.max_detections_per_image = self._cfg.TEST.DETECTIONS_PER_IMAGE

        # anchor generator
        feature_shapes = [ShapeSpec(stride=s) for s in (8, 16, 32, 64, 128)]
        self._anchor_generator = build_anchor_generator(
            self._cfg, feature_shapes)

        ns.preprocess_image = functools.partial(
            meta_arch.RetinaNet.preprocess_image, ns)
        ns.inference = functools.partial(meta_arch.RetinaNet.inference, ns)
        ns.inference_single_image = functools.partial(
            meta_arch.RetinaNet.inference_single_image, ns)
        ns.box2box_transform = Box2BoxTransform(
            weights=self._cfg.MODEL.RPN.BBOX_REG_WEIGHTS)

        self._ns = ns
Example #23
0
    def __init__(self, cfg):
        super().__init__()
        self.device = torch.device(cfg.MODEL.DEVICE)

        self.backbone_level = cfg.MODEL.YOLOF.ENCODER.BACKBONE_LEVEL
        self.backbone = build_backbone(cfg)
        self.nums_classes = cfg.MODEL.YOLOF.DECODER.NUM_CLASSES

        # build anchor generator
        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[self.backbone_level]]
        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # build encode decode
        self.encoder = DilatedEncoder(cfg, backbone_shape)
        self.decoder = Decoder(cfg)

        # prepare ground truth
        self.box2box_transform = YOLOFBox2BoxTransform(
            weights=cfg.MODEL.YOLOF.BOX_TRANSFORM.BBOX_REG_WEIGHTS,
            add_ctr_clamp=cfg.MODEL.YOLOF.BOX_TRANSFORM.ADD_CTR_CLAMP,
            ctr_clamp=cfg.MODEL.YOLOF.BOX_TRANSFORM.CTR_CLAMP)
        self.anchor_matcher = UniformMatcher(cfg.MODEL.YOLOF.MATCHER.TOPK)
        self.test_score_thresh = 0.05
        self.test_nms_thresh = 0.6
        self.test_topk_candidates = 1000
        self.max_detections_per_image = 100

        # build loss
        self.losses = Losses(cfg)

        # get normalizer
        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()
        # fmt: off
        in_channels = input_shape[0].channels
        num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        prior_prob = cfg.MODEL.RETINANET.PRIOR_PROB
        num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors

        norm = cfg.MODEL.RETINANET.NORM
        num_convs = cfg.MODEL.RETINANET.NUM_CONVS
        in_features = cfg.MODEL.RETINANET.IN_FEATURES
        # fmt: on

        assert (len(set(num_anchors)) == 1), \
          "Using different number of anchors between levels is not currently supported!"
        num_anchors = num_anchors[0]

        cls_depthwise_convs = []
        cls_pointwise_convs = []
        bbox_depthwise_convs = []
        bbox_pointwise_convs = []
        for _ in range(num_convs):
            cls_depthwise_convs.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          groups=in_channels,
                          bias=False))
            cls_pointwise_convs.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=1,
                          stride=1,
                          padding=0,
                          bias=norm == ''))
            bbox_depthwise_convs.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          groups=in_channels,
                          bias=False))
            bbox_pointwise_convs.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=1,
                          stride=1,
                          padding=0,
                          bias=norm == ''))

        self.cls_subnets = nn.ModuleList()
        self.bbox_subnets = nn.ModuleList()
        for _ in in_features:
            cls_subnet = []
            bbox_subnet = []
            for cls_depthwise, cls_pointwise, bbox_depthwise, bbox_pointwise in \
                zip(cls_depthwise_convs, cls_pointwise_convs, bbox_depthwise_convs, bbox_pointwise_convs):
                cls_subnet.append(
                    ResHead(cls_depthwise, cls_pointwise, in_channels, norm))
                bbox_subnet.append(
                    ResHead(bbox_depthwise, bbox_pointwise, in_channels, norm))
            self.cls_subnets.append(nn.Sequential(*cls_subnet))
            self.bbox_subnets.append(nn.Sequential(*bbox_subnet))

        self.cls_score = nn.Conv2d(in_channels,
                                   num_anchors * num_classes,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   num_anchors * 4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)

        # Initialization
        for modules in [
                self.cls_subnets, self.bbox_subnets, self.cls_score,
                self.bbox_pred
        ]:
            for layer in modules.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
                    if layer.bias is not None:
                        torch.nn.init.constant_(layer.bias, 0)

        # Use prior in model initialization to improve stability
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_score.bias, bias_value)
Example #25
0
    def __init__(self, cfg):
        super().__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # fmt: off
        self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        self.in_features = cfg.MODEL.RETINANET.IN_FEATURES
        # Loss parameters:
        self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA
        self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA
        self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA
        # Inference parameters:
        self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST
        self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST
        self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST
        self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        # Vis parameters
        self.vis_period = cfg.VIS_PERIOD
        self.input_format = cfg.INPUT.FORMAT
        # fmt: on

        # for onnx model export
        self.export_onnx = cfg.MODEL.FASHIONNET.EXPORT_ONNX

        # for classification task
        self.classification_tasks = cfg.MODEL.FASHIONNET.CLASSIFICATION_HEAD.TASK_NAMES
        self.classification_classes = cfg.MODEL.FASHIONNET.CLASSIFICATION_HEAD.NUM_CLASSES
        assert (len(self.classification_classes) == len(
            self.classification_tasks))
        self.activation = cfg.MODEL.FASHIONNET.CLASSIFICATION_HEAD.ACTIVATION
        self.fashion_score_threshold = cfg.MODEL.FASHIONNET.CLASSIFICATION_HEAD.SCORE_THRESH

        self.backbone = build_backbone(cfg)
        self.size_divisibility = 32

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]

        self.head = RetinaNetHead(cfg, feature_shapes)
        self.cls_head = FashionClassificationHead(cfg, feature_shapes)

        # # multi task learning with uncertainty
        # self.log_vars = nn.Parameter(torch.zeros(2), requires_grad=True)

        self.anchor_generator = build_anchor_generator(cfg, feature_shapes)

        # Matching and loss
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        self.matcher = Matcher(
            cfg.MODEL.RETINANET.IOU_THRESHOLDS,
            cfg.MODEL.RETINANET.IOU_LABELS,
            allow_low_quality_matches=True,
        )

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std
        self.to(self.device)
        """
        In Detectron1, loss is normalized by number of foreground samples in the batch.
        When batch size is 1 per GPU, #foreground has a large variance and
        using it lead to lower performance. Here we maintain an EMA of #foreground to
        stabilize the normalizer.
        """
        self.loss_normalizer = 100  # initialize with any reasonable #fg that's not too small
        self.loss_normalizer_momentum = 0.9
Example #26
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()
        # the same as RetinaNetHead, we replace the cls_score net to logits net, which utilizes the deform_conv
        # fmt: off
        in_channels = input_shape[0].channels
        num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        num_convs = cfg.MODEL.RETINANET.NUM_CONVS
        prior_prob = cfg.MODEL.RETINANET.PRIOR_PROB
        num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors
        # fmt: on
        assert (
                len(set(num_anchors)) == 1
        ), "Using different number of anchors between levels is not currently supported!"
        num_anchors = num_anchors[0]

        cls_subnet = []
        bbox_subnet = []
        for _ in range(num_convs):
            cls_subnet.append(
                nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
            )
            cls_subnet.append(nn.ReLU())
            bbox_subnet.append(
                nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
            )
            bbox_subnet.append(nn.ReLU())

        self.cls_subnet = nn.Sequential(*cls_subnet)
        self.bbox_subnet = nn.Sequential(*bbox_subnet)
        #        self.cls_score = nn.Conv2d(
        #            in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1
        #        )
        self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1)

        # Initialization
        for modules in [self.cls_subnet, self.bbox_subnet, self.bbox_pred]:
            for layer in modules.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
                    torch.nn.init.constant_(layer.bias, 0)

        # Deform_conv block, added as a second stage refinement. The implementation follows reppoints.
        self.dcn_kernel = 3
        self.dcn_pad = 1
        self.point_base_scale = 4
        self.gradient_mul = 0.1
        self.in_channels = in_channels
        self.num_anchors = num_anchors
        self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
        dcn_base = np.arange(-self.dcn_pad,
                             self.dcn_pad + 1).astype(np.float64)
        dcn_base_y = np.repeat(dcn_base, self.dcn_kernel)
        dcn_base_x = np.tile(dcn_base, self.dcn_kernel)
        dcn_base_offset = np.stack([dcn_base_y, dcn_base_x], axis=1).reshape((-1))
        dcn_base_offset = torch.tensor(dcn_base_offset, dtype=torch.float32).view(1, -1, 1, 1)
        self.register_buffer("dcn_base_offset", dcn_base_offset)

        self.deform_cls_conv = DeformConv(
            self.in_channels,
            self.in_channels,
            self.dcn_kernel, 1, self.dcn_pad)
        self.deform_reg_conv = DeformConv(
            self.in_channels,
            self.in_channels,
            self.dcn_kernel, 1, self.dcn_pad)
        self.offsets_refine = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(self.num_anchors * self.in_channels,
                      num_anchors * 4,
                      1, 1, 0))
        self.logits = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(self.num_anchors * self.in_channels,
                      num_anchors * num_classes,
                      1, 1, 0))

        bias_init = float(-np.log((1 - 0.01) / 0.01))
        for modules in [
            self.offsets_refine,
            self.deform_cls_conv,
            self.deform_reg_conv]:
            for layer in modules.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
                    torch.nn.init.constant_(layer.bias, 0)

        for module in self.logits.modules():
            if hasattr(module, 'bias') and module.bias is not None:
                torch.nn.init.constant_(module.bias, bias_init)
Example #27
0
    def __init__(self,
                 cfg,
                 use_dropout,
                 dropout_rate,
                 compute_cls_var,
                 compute_bbox_cov,
                 bbox_cov_dims,
                 input_shape: List[ShapeSpec]):
        super().__init__(cfg, input_shape)

        # Extract config information
        # fmt: off
        in_channels = input_shape[0].channels
        num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        num_convs = cfg.MODEL.RETINANET.NUM_CONVS
        prior_prob = cfg.MODEL.RETINANET.PRIOR_PROB
        num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors
        # fmt: on
        assert (
            len(set(num_anchors)) == 1
        ), "Using different number of anchors between levels is not currently supported!"
        num_anchors = num_anchors[0]

        self.compute_cls_var = compute_cls_var
        self.compute_bbox_cov = compute_bbox_cov
        self.bbox_cov_dims = bbox_cov_dims

        # For consistency all configs are grabbed from original RetinaNet
        self.use_dropout = use_dropout
        self.dropout_rate = dropout_rate

        cls_subnet = []
        bbox_subnet = []
        for _ in range(num_convs):
            cls_subnet.append(
                nn.Conv2d(
                    in_channels,
                    in_channels,
                    kernel_size=3,
                    stride=1,
                    padding=1))
            cls_subnet.append(nn.ReLU())

            bbox_subnet.append(
                nn.Conv2d(
                    in_channels,
                    in_channels,
                    kernel_size=3,
                    stride=1,
                    padding=1))
            bbox_subnet.append(nn.ReLU())

            if self.use_dropout:
                cls_subnet.append(nn.Dropout(p=self.dropout_rate))
                bbox_subnet.append(nn.Dropout(p=self.dropout_rate))

        self.cls_subnet = nn.Sequential(*cls_subnet)
        self.bbox_subnet = nn.Sequential(*bbox_subnet)

        self.cls_score = nn.Conv2d(
            in_channels,
            num_anchors *
            num_classes,
            kernel_size=3,
            stride=1,
            padding=1)
        self.bbox_pred = nn.Conv2d(
            in_channels,
            num_anchors * 4,
            kernel_size=3,
            stride=1,
            padding=1)

        for modules in [
                self.cls_subnet,
                self.bbox_subnet,
                self.cls_score,
                self.bbox_pred]:
            for layer in modules.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
                    torch.nn.init.constant_(layer.bias, 0)

        # Use prior in model initialization to improve stability
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_score.bias, bias_value)

        # Create subnet for classification variance estimation.
        if self.compute_cls_var:
            self.cls_var = nn.Conv2d(
                in_channels,
                num_anchors *
                num_classes,
                kernel_size=3,
                stride=1,
                padding=1)

            for layer in self.cls_var.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
                    torch.nn.init.constant_(layer.bias, -10.0)

        # Create subnet for bounding box covariance estimation.
        if self.compute_bbox_cov:
            self.bbox_cov = nn.Conv2d(
                in_channels,
                num_anchors * self.bbox_cov_dims,
                kernel_size=3,
                stride=1,
                padding=1)

            for layer in self.bbox_cov.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.normal_(layer.weight, mean=0, std=0.0001)
                    torch.nn.init.constant_(layer.bias, 0)
Example #28
0
    def __init__(self, cfg: CfgNode, input_shape: List[ShapeSpec]) -> None:
        super().__init__()
        in_channels = input_shape[0].channels
        num_classes: int = cfg.MODEL.RETINANET.NUM_CLASSES
        num_convs: int = cfg.MODEL.RETINANET.NUM_CONVS
        prior_prob: float = cfg.MODEL.RETINANET.PRIOR_PROB
        num_anchors: List[int] = build_anchor_generator(
            cfg, input_shape).num_cell_anchors

        assert len(set(num_anchors)) == 1,\
            "Using different number of anchors between levels is not currently supported!"

        num_anchors_int: int = num_anchors[0]

        cls_subnet: List[nn.Module] = []
        bbox_subnet: List[nn.Module] = []
        for _ in range(num_convs):
            cls_subnet.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            cls_subnet.append(nn.ReLU())
            bbox_subnet.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            bbox_subnet.append(nn.ReLU())

        self.cls_subnet = nn.Sequential(*cls_subnet)
        self.bbox_subnet = nn.Sequential(*bbox_subnet)
        self.cls_score = nn.Conv2d(in_channels,
                                   num_anchors_int * num_classes,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)

        self.bbox_pred = nn.Conv2d(in_channels,
                                   num_anchors_int * 4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)

        # Initialization
        for modules in [
                self.cls_subnet, self.bbox_subnet, self.cls_score,
                self.bbox_pred
        ]:

            for layer in modules.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.normal_(tensor=layer.weight,
                                          mean=0,
                                          std=0.01)
                    torch.nn.init.constant_(tensor=layer.bias, val=0)

        # Use prior in model initialization to improve stability
        bias_value: float = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_score.bias, bias_value)
Example #29
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__()

        # fmt: off
        num_conv = cfg.MODEL.HORPN.NUM_CONV
        conv_dim = cfg.MODEL.HORPN.CONV_DIM
        num_rn_fc = cfg.MODEL.HORPN.NUM_RN_FC
        rn_fc_dim = cfg.MODEL.HORPN.RN_FC_DIM
        self.topk = cfg.MODEL.HORPN.TOPK_PERSON_CELLS
        # fmt: on

        anchor_generator = build_anchor_generator(cfg, input_shape)
        box_dim = self.box_dim = anchor_generator.box_dim

        # standard HORPN head is shared across levels:
        in_channels = [s.channels for s in input_shape]
        assert len(
            set(in_channels)) == 1, "Each level must have the same channel!"
        in_channels = in_channels[0]

        # HORPN head should take the same input as anchor generator
        num_cell_anchors = anchor_generator.num_cell_anchors
        assert len(set(num_cell_anchors)
                   ) == 1, "Each level must have the same number of anchors"
        num_cell_anchors = self.num_cell_anchors = num_cell_anchors[0]

        # 3x3 conv for the person hidden features
        _p_out_dim = in_channels
        self.person_convs = []
        for k in range(num_conv):
            conv = nn.Conv2d(_p_out_dim,
                             conv_dim,
                             kernel_size=3,
                             stride=1,
                             padding=1)
            self.add_module("person_conv{}".format(k + 1), conv)
            self.person_convs.append(conv)
            _p_out_dim = conv_dim

        # 3x3 conv for the object hidden features
        _o_out_dim = in_channels
        self.object_convs = []
        for k in range(num_conv):
            conv = nn.Conv2d(_o_out_dim,
                             conv_dim,
                             kernel_size=3,
                             stride=1,
                             padding=1)
            self.add_module("object_conv{}".format(k + 1), conv)
            self.object_convs.append(conv)
            _o_out_dim = conv_dim

        # Relational networks for interactness logits prediction
        _out_dim = _o_out_dim + _p_out_dim
        self.rn_fcs = []
        for k in range(num_rn_fc):
            fc = nn.Linear(_out_dim, rn_fc_dim)
            self.add_module("rn_fc{}".format(k + 1), fc)
            self.rn_fcs.append(fc)
            _out_dim = rn_fc_dim

        # Proposal predictor
        self.person_logits = nn.Conv2d(_p_out_dim,
                                       num_cell_anchors,
                                       kernel_size=1)
        self.person_deltas = nn.Conv2d(_p_out_dim,
                                       num_cell_anchors * box_dim,
                                       kernel_size=1)
        self.object_logits = nn.Linear(_out_dim, num_cell_anchors)
        self.object_deltas = nn.Conv2d(_o_out_dim,
                                       num_cell_anchors * box_dim,
                                       kernel_size=1)

        # Weights initialization
        for layer in self.person_convs:
            weight_init.c2_msra_fill(layer)
        for layer in self.object_convs:
            weight_init.c2_msra_fill(layer)
        for layer in self.rn_fcs:
            weight_init.c2_xavier_fill(layer)
        for layer in [self.person_logits, self.person_deltas]:
            nn.init.normal_(layer.weight, std=0.01)
            nn.init.constant_(layer.bias, 0)
        for layer in [self.object_logits, self.object_deltas]:
            nn.init.normal_(layer.weight, std=0.01)
            nn.init.constant_(layer.bias, 0)
Example #30
0
    def __init__(self, cfg, input_shape: List[ShapeSpec]):
        super().__init__(cfg, input_shape)
        # fmt: off
        in_channels = input_shape[0].channels
        num_classes = cfg.MODEL.RETINANET.NUM_CLASSES
        num_convs = cfg.MODEL.RETINANET.NUM_CONVS
        prior_prob = cfg.MODEL.RETINANET.PRIOR_PROB
        num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors
        # fmt: on
        assert (
            len(set(num_anchors)) == 1
        ), "Using different number of anchors between levels is not currently supported!"
        num_anchors = num_anchors[0]

        cls_subnet = []
        bbox_subnet = []
        for i in range(num_convs):
            # add conv to cls
            cls_subnet.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            # add relu to cls
            cls_subnet.append(nn.ReLU())
            if (i == int(round(num_convs / 2))):
                cls_subnet.append(nn.Dropout2d(0.5))
            # do same for relu
            bbox_subnet.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            bbox_subnet.append(nn.ReLU())
            if (i == int(round(num_convs / 2))):
                bbox_subnet.append(nn.Dropout2d(0.5))

        self.cls_subnet = nn.Sequential(*cls_subnet)
        self.bbox_subnet = nn.Sequential(*bbox_subnet)
        self.cls_score = nn.Conv2d(in_channels,
                                   num_anchors * num_classes,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   num_anchors * 4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)

        # Initialization
        for modules in [
                self.cls_subnet, self.bbox_subnet, self.cls_score,
                self.bbox_pred
        ]:
            for layer in modules.modules():
                if isinstance(layer, nn.Conv2d):
                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
                    torch.nn.init.constant_(layer.bias, 0)

        # Use prior in model initialization to improve stability
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_score.bias, bias_value)