예제 #1
0
    def __init__(self, config):
        super(ResNet50Conv5ROIFeatureExtractor, self).__init__()

        resolution = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = config.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )

        stage = resnet.StageSpec(index=4, block_count=3, return_features=False)
        head = resnet.ResNetHead(
            block_module=config.MODEL.RESNETS.TRANS_FUNC,
            stages=(stage, ),
            num_groups=config.MODEL.RESNETS.NUM_GROUPS,
            width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP,
            stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1,
            stride_init=None,
            res2_out_channels=config.MODEL.RESNETS.RES2_OUT_CHANNELS,
            dilation=config.MODEL.RESNETS.RES5_DILATION,
            cfg=config)

        self.pooler = pooler
        self.head = head

        if config.MODEL.DECONV.LAYERWISE_NORM:
            norm_type = config.MODEL.DECONV.BOX_NORM_TYPE
        else:
            norm_type = 'none'
            if config.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm':
                self.box_norm = LayerNorm(eps=config.MODEL.DECONV.EPS)
    def __init__(self, cfg):
        super(FPNPredictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM

        if cfg.MODEL.DECONV.LAYERWISE_NORM:
            norm_type = cfg.MODEL.DECONV.BOX_NORM_TYPE
        else:
            norm_type = 'none'
            if cfg.MODEL.DECONV.BOX_NORM_TYPE == 'rfnorm' or cfg.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm':
                self.box_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS)

        if cfg.MODEL.ROI_BOX_HEAD.USE_DECONV:
            block = cfg.MODEL.DECONV.BLOCK_FC
            self.cls_score = Delinear(representation_size,
                                      num_classes,
                                      block=block,
                                      sync=cfg.MODEL.DECONV.SYNC,
                                      norm_type=norm_type)
            self.bbox_pred = Delinear(representation_size,
                                      num_classes * 4,
                                      block=block,
                                      sync=cfg.MODEL.DECONV.SYNC,
                                      norm_type=norm_type)
        else:
            self.cls_score = nn.Linear(representation_size, num_classes)
            self.bbox_pred = nn.Linear(representation_size, num_classes * 4)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for l in [self.cls_score, self.bbox_pred]:
            nn.init.constant_(l.bias, 0)
    def __init__(self, cfg, in_channels, num_anchors):
        """
        Arguments:
            cfg              : config
            in_channels (int): number of channels of the input feature
            num_anchors (int): number of anchors to be predicted
        """
        super(RPNHead, self).__init__()

        if cfg.MODEL.DECONV.LAYERWISE_NORM:
            norm_type=cfg.MODEL.DECONV.RPN_NORM_TYPE
        else:
            norm_type='none'
            if cfg.MODEL.DECONV.RPN_NORM_TYPE=='layernorm':
                self.rpn_norm=LayerNorm(eps=cfg.MODEL.DECONV.EPS)

        if cfg.MODEL.RPN.USE_DECONV:
            self.conv = Deconv(in_channels, in_channels, kernel_size=3, stride=1, padding=1, block=cfg.MODEL.DECONV.BLOCK,sampling_stride=cfg.MODEL.DECONV.STRIDE,sync=cfg.MODEL.DECONV.SYNC,norm_type=norm_type)
            self.cls_logits = Deconv(in_channels, num_anchors, kernel_size=1, stride=1, block=cfg.MODEL.DECONV.BLOCK,sampling_stride=cfg.MODEL.DECONV.STRIDE,sync=cfg.MODEL.DECONV.SYNC,norm_type=norm_type)
            self.bbox_pred = Deconv(in_channels, num_anchors * 4, kernel_size=1, stride=1, block=cfg.MODEL.DECONV.BLOCK,sampling_stride=cfg.MODEL.DECONV.STRIDE,sync=cfg.MODEL.DECONV.SYNC,norm_type=norm_type)

        else:
            self.conv = nn.Conv2d(
                in_channels, in_channels, kernel_size=3, stride=1, padding=1
            )
            self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1)
            self.bbox_pred = nn.Conv2d(
                in_channels, num_anchors * 4, kernel_size=1, stride=1
            )

        for l in [self.conv, self.cls_logits, self.bbox_pred]:
            torch.nn.init.normal_(l.weight, std=0.01)
            torch.nn.init.constant_(l.bias, 0)
    def __init__(self, cfg):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        use_gw = cfg.MODEL.ROI_MASK_HEAD.USE_GW

        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION
        use_deconv = cfg.MODEL.ROI_MASK_HEAD.USE_DECONV
        block = cfg.MODEL.DECONV.BLOCK

        if use_deconv:
            use_gn = False
            use_gw = False

        next_feature = input_size
        self.blocks = []

        if cfg.MODEL.DECONV.LAYERWISE_NORM:
            norm_type = cfg.MODEL.DECONV.MASK_NORM_TYPE
        else:
            norm_type = 'none'
            if cfg.MODEL.DECONV.MASK_NORM_TYPE == 'layernorm':
                self.mask_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS)

        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(next_feature,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn,
                                  use_gw=use_gw,
                                  use_deconv=use_deconv,
                                  block=block,
                                  sampling_stride=cfg.MODEL.DECONV.STRIDE,
                                  sync=cfg.MODEL.DECONV.SYNC,
                                  norm_type=norm_type)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
    def __init__(self, cfg):
        super(MaskRCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]

        if cfg.MODEL.ROI_HEADS.USE_FPN:
            num_inputs = dim_reduced
        else:
            stage_index = 4
            stage2_relative_factor = 2**(stage_index - 1)
            res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
            num_inputs = res2_out_channels * stage2_relative_factor
        if cfg.MODEL.ROI_MASK_HEAD.USE_DECONV:
            block = cfg.MODEL.DECONV.BLOCK

            if cfg.MODEL.DECONV.LAYERWISE_NORM:
                norm_type = cfg.MODEL.DECONV.MASK_NORM_TYPE
            else:
                norm_type = 'none'
                if cfg.MODEL.DECONV.MASK_NORM_TYPE == 'layernorm':
                    self.mask_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS)

            self.conv5_mask = DeconvTransposed(
                num_inputs,
                dim_reduced,
                2,
                2,
                0,
                block=block,
                sampling_stride=cfg.MODEL.DECONV.STRIDE,
                sync=cfg.MODEL.DECONV.SYNC,
                norm_type=norm_type)
            self.mask_fcn_logits = Deconv(
                dim_reduced,
                num_classes,
                1,
                1,
                0,
                block=block,
                sampling_stride=cfg.MODEL.DECONV.STRIDE,
                sync=cfg.MODEL.DECONV.SYNC,
                norm_type=norm_type)
        else:
            self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
            self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
예제 #6
0
    def __init__(self, cfg):
        super(FPN2MLPFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution**2
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        use_gw = cfg.MODEL.ROI_BOX_HEAD.USE_GW

        block = 0
        use_delinear = cfg.MODEL.ROI_BOX_HEAD.USE_DECONV
        if use_delinear:
            use_gn = False
            use_gw = False
            block = cfg.MODEL.DECONV.BLOCK_FC  #check here

        if cfg.MODEL.DECONV.LAYERWISE_NORM:
            norm_type = cfg.MODEL.DECONV.BOX_NORM_TYPE
        else:
            norm_type = 'none'
            if cfg.MODEL.DECONV.BOX_NORM_TYPE == 'rfnorm' or cfg.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm':
                self.box_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS)

        self.pooler = pooler
        self.fc6 = make_fc(input_size,
                           representation_size,
                           use_gn,
                           use_gw,
                           use_delinear,
                           block=block,
                           sync=cfg.MODEL.DECONV.SYNC,
                           norm_type=norm_type)
        self.fc7 = make_fc(representation_size,
                           representation_size,
                           use_gn,
                           use_gw,
                           use_delinear,
                           block=block,
                           sync=cfg.MODEL.DECONV.SYNC,
                           norm_type=norm_type)
    def __init__(self, config, pretrained=None):
        super(FastRCNNPredictor, self).__init__()

        stage_index = 4
        stage2_relative_factor = 2**(stage_index - 1)
        res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS
        num_inputs = res2_out_channels * stage2_relative_factor

        num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7)

        if config.MODEL.DECONV.LAYERWISE_NORM:
            norm_type = config.MODEL.DECONV.BOX_NORM_TYPE
        else:
            norm_type = 'none'
            if config.MODEL.DECONV.BOX_NORM_TYPE == 'rfnorm' or config.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm':
                self.box_norm = LayerNorm(eps=config.MODEL.DECONV.EPS)

        if config.MODEL.ROI_BOX_HEAD.USE_DECONV:
            block = config.MODEL.DECONV.BLOCK_FC
            self.cls_score = Delinear(num_inputs,
                                      num_classes,
                                      block=block,
                                      sync=config.MODEL.DECONV.SYNC,
                                      norm_type=norm_type)
            self.bbox_pred = Delinear(num_inputs,
                                      num_classes * 4,
                                      block=block,
                                      sync=config.MODEL.DECONV.SYNC,
                                      norm_type=norm_type)
        else:
            self.cls_score = nn.Linear(num_inputs, num_classes)
            self.bbox_pred = nn.Linear(num_inputs, num_classes * 4)

        nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
        nn.init.constant_(self.cls_score.bias, 0)

        nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
        nn.init.constant_(self.bbox_pred.bias, 0)
예제 #8
0
    def __init__(self, cfg):
        super(FPNXconv1fcFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        use_gw = cfg.MODEL.ROI_BOX_HEAD.USE_GW

        in_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS
        conv_head_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM
        num_stacked_convs = cfg.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS
        dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION

        if cfg.MODEL.DECONV.LAYERWISE_NORM:
            norm_type = cfg.MODEL.DECONV.BOX_NORM_TYPE
        else:
            norm_type = 'none'
            if cfg.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm':
                self.box_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS)

        xconvs = []
        for ix in range(num_stacked_convs):
            if cfg.MODEL.ROI_BOX_HEAD.USE_DECONV:
                xconvs.append(
                    Deconv(in_channels,
                           conv_head_dim,
                           kernel_size=3,
                           stride=1,
                           padding=dilation,
                           dilation=dilation,
                           bias=True,
                           block=cfg.MODEL.DECONV.BLOCK,
                           sampling_stride=cfg.MODEL.DECONV.STRIDE,
                           sync=cfg.MODEL.DECONV.SYNC,
                           norm_type=norm_type))
                in_channels = conv_head_dim
            else:
                xconvs.append(
                    nn.Conv2d(in_channels,
                              conv_head_dim,
                              kernel_size=3,
                              stride=1,
                              padding=dilation,
                              dilation=dilation,
                              bias=False if (use_gn or use_gw) else True))
                in_channels = conv_head_dim
                if use_gn or use_gw:
                    xconvs.append(group_norm(in_channels))

            xconvs.append(nn.ReLU(inplace=True))

        self.add_module("xconvs", nn.Sequential(*xconvs))
        for modules in [
                self.xconvs,
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d) or isinstance(l, Deconv):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    if not (use_gn or use_gw):
                        torch.nn.init.constant_(l.bias, 0)

        input_size = conv_head_dim * resolution**2
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM

        block = 0
        use_delinear = cfg.MODEL.ROI_BOX_HEAD.USE_DECONV
        if use_delinear:
            block = cfg.MODEL.DECONV.BLOCK_FC  #check here

        self.fc6 = make_fc(input_size,
                           representation_size,
                           use_gn=False,
                           use_gw=False,
                           use_delinear=use_delinear,
                           block=block,
                           sync=cfg.MODEL.DECONV.SYNC,
                           norm_type=norm_type)
    def __init__(self, cfg):
        super(ResNet, self).__init__()

        # If we want to use the cfg in forward(), then we should make a copy
        # of it and store it for later use:
        # self.cfg = cfg.clone()

        # Translate string names to implementations
        stem_module = _STEM_MODULES[cfg.MODEL.RESNETS.STEM_FUNC]
        stage_specs = _STAGE_SPECS[cfg.MODEL.BACKBONE.CONV_BODY]
        transformation_module = _TRANSFORMATION_MODULES[cfg.MODEL.RESNETS.TRANS_FUNC]
        
        if cfg.MODEL.DECONV.LAYERWISE_NORM:
            norm_type=cfg.MODEL.DECONV.BOTTLENECK_NORM_TYPE
        else:
            norm_type='none'

        if 'Deconv' in cfg.MODEL.RESNETS.TRANS_FUNC:
            transformation_module=functools.partial(
                    _TRANSFORMATION_MODULES[cfg.MODEL.RESNETS.TRANS_FUNC],
                    block=cfg.MODEL.DECONV.BLOCK,sampling_stride=cfg.MODEL.DECONV.STRIDE,sync=cfg.MODEL.DECONV.SYNC,norm_type=norm_type)
                    
        # Construct the stem module
        self.stem = stem_module(cfg)

        # Constuct the specified ResNet stages
        num_groups = cfg.MODEL.RESNETS.NUM_GROUPS
        width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
        in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
        stage2_bottleneck_channels = num_groups * width_per_group
        stage2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
        self.stages = []
        self.return_features = {}
       
        for stage_spec in stage_specs:
            name = "layer" + str(stage_spec.index)
            stage2_relative_factor = 2 ** (stage_spec.index - 1)
            bottleneck_channels = stage2_bottleneck_channels * stage2_relative_factor
            out_channels = stage2_out_channels * stage2_relative_factor
            module = _make_stage(
                transformation_module,
                in_channels,
                bottleneck_channels,
                out_channels,
                stage_spec.block_count,
                num_groups,
                cfg.MODEL.RESNETS.STRIDE_IN_1X1,
                first_stride=int(stage_spec.index > 1) + 1,
            )
            in_channels = out_channels
            self.add_module(name, module)
            self.stages.append(name)
            self.return_features[name] = stage_spec.return_features


                    
        # Optionally freeze (requires_grad=False) parts of the backbone
        self._freeze_backbone(cfg.MODEL.BACKBONE.FREEZE_CONV_BODY_AT)

        if cfg.MODEL.DECONV.LAYERWISE_NORM:
            pass
        else:
            if cfg.MODEL.DECONV.STEM_NORM_TYPE=='layernorm':
                self.stem_norm=LayerNorm(eps=cfg.MODEL.DECONV.EPS)
            if cfg.MODEL.DECONV.BOTTLENECK_NORM_TYPE=='layernorm':
                self.bottleneck_norm=LayerNorm(eps=cfg.MODEL.DECONV.EPS)
            if cfg.MODEL.DECONV.FPN_NORM_TYPE=='layernorm':
                self.fpn_norm=LayerNorm(eps=cfg.MODEL.DECONV.EPS)