Exemple #1
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        stride=1,
        norm="BN",
        num_branch=3,
        dilations=(1, 2, 3),
        concat_output=False,
        test_branch_idx=-1,
        has_pool=False,
    ):
        """
        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            stride (int): Stride for the first conv.
            norm (str or callable): normalization for all conv layers.
                See :func:`layers.get_norm` for supported format.
        """
        super().__init__(in_channels, out_channels, stride)

        assert num_branch == len(dilations)

        self.num_branch = num_branch
        self.concat_output = concat_output
        self.test_branch_idx = test_branch_idx

        self.has_pool = has_pool
        self.pool_stride = stride
        stride = 1

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        self.conv1 = MRRPConv(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            paddings=dilations,
            bias=False,
            dilations=dilations,
            num_branch=num_branch,
            test_branch_idx=test_branch_idx,
            norm=get_norm(norm, out_channels),
        )

        self.conv2 = MRRPConv(
            out_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            paddings=dilations,
            bias=False,
            dilations=dilations,
            num_branch=num_branch,
            test_branch_idx=test_branch_idx,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)

        if self.has_pool:
            self.list_pool = []
            for _ in range(num_branch):
                self.list_pool.append(
                    nn.MaxPool2d(kernel_size=2, stride=self.pool_stride, padding=0)
                )
            self.list_pool = nn.ModuleList(self.list_pool)
Exemple #2
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        stride_in_1x1=False,
        dilation=1,
    ):
        """
        Args:
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
            stride_in_1x1 (bool): when stride==2, whether to put stride in the
                first 1x1 convolution or the bottleneck 3x3 convolution.
        """
        super().__init__(in_channels, out_channels, stride)

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv2 = Conv2d(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            bias=False,
            groups=num_groups,
            dilation=dilation,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
Exemple #3
0
    def __init__(
        self,
        input_shape: Dict[str, ShapeSpec],
        *,
        decoder_channels: List[int],
        norm: Union[str, Callable],
        head_channels: int,
        center_loss_weight: float,
        offset_loss_weight: float,
        **kwargs,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature
            decoder_channels (list[int]): a list of output channels of each
                decoder stage. It should have the same length as "in_features"
                (each element in "in_features" corresponds to one decoder stage).
            norm (str or callable): normalization for all conv layers.
            head_channels (int): the output channels of extra convolutions
                between decoder and predictor.
            center_loss_weight (float): loss weight for center point prediction.
            offset_loss_weight (float): loss weight for center offset prediction.
        """
        super().__init__(input_shape,
                         decoder_channels=decoder_channels,
                         norm=norm,
                         **kwargs)
        assert self.decoder_only

        self.center_loss_weight = center_loss_weight
        self.offset_loss_weight = offset_loss_weight
        use_bias = norm == ""
        # center prediction
        # `head` is additional transform before predictor
        self.center_head = nn.Sequential(
            Conv2d(
                decoder_channels[0],
                decoder_channels[0],
                kernel_size=3,
                padding=1,
                bias=use_bias,
                norm=get_norm(norm, decoder_channels[0]),
                activation=F.relu,
            ),
            Conv2d(
                decoder_channels[0],
                head_channels,
                kernel_size=3,
                padding=1,
                bias=use_bias,
                norm=get_norm(norm, head_channels),
                activation=F.relu,
            ),
        )
        weight_init.c2_xavier_fill(self.center_head[0])
        weight_init.c2_xavier_fill(self.center_head[1])
        self.center_predictor = Conv2d(head_channels, 1, kernel_size=1)
        nn.init.normal_(self.center_predictor.weight, 0, 0.001)
        nn.init.constant_(self.center_predictor.bias, 0)

        # offset prediction
        # `head` is additional transform before predictor
        if self.use_depthwise_separable_conv:
            # We use a single 5x5 DepthwiseSeparableConv2d to replace
            # 2 3x3 Conv2d since they have the same receptive field.
            self.offset_head = DepthwiseSeparableConv2d(
                decoder_channels[0],
                head_channels,
                kernel_size=5,
                padding=2,
                norm1=norm,
                activation1=F.relu,
                norm2=norm,
                activation2=F.relu,
            )
        else:
            self.offset_head = nn.Sequential(
                Conv2d(
                    decoder_channels[0],
                    decoder_channels[0],
                    kernel_size=3,
                    padding=1,
                    bias=use_bias,
                    norm=get_norm(norm, decoder_channels[0]),
                    activation=F.relu,
                ),
                Conv2d(
                    decoder_channels[0],
                    head_channels,
                    kernel_size=3,
                    padding=1,
                    bias=use_bias,
                    norm=get_norm(norm, head_channels),
                    activation=F.relu,
                ),
            )
            weight_init.c2_xavier_fill(self.offset_head[0])
            weight_init.c2_xavier_fill(self.offset_head[1])
        self.offset_predictor = Conv2d(head_channels, 2, kernel_size=1)
        nn.init.normal_(self.offset_predictor.weight, 0, 0.001)
        nn.init.constant_(self.offset_predictor.bias, 0)

        self.center_loss = nn.MSELoss(reduction="none")
        self.offset_loss = nn.L1Loss(reduction="none")
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_conv: the number of conv layers
            conv_dim: the dimension of the conv layers
            norm: normalization for the conv layers
        """
        super(MaskRCNNConvUpsampleHead, self).__init__()

        # fmt: off
        num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        conv_dims = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
        self.norm = cfg.MODEL.ROI_MASK_HEAD.NORM
        num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV
        input_channels = input_shape.channels
        cls_agnostic_mask = cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK
        # fmt: on

        self.conv_norm_relus = []

        for k in range(num_conv):
            conv = Conv2d(
                input_channels if k == 0 else conv_dims,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, conv_dims),
                activation=F.relu,
            )
            self.add_module("mask_fcn{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
        self.cfg = cfg
        self.deconv = ConvTranspose2d(
            conv_dims if num_conv > 0 else input_channels,
            conv_dims,
            kernel_size=2,
            stride=2,
            padding=0,
        )
        if self.cfg.MODEL.TRANSFER_FUNCTION:
            self.in_feat_dim = 256 * (
                self.cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION *
                2) * (self.cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION * 2)
            self.out_feat_dim = (
                self.cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION *
                2) * (self.cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION * 2)
            self.MLP = nn.Sequential(
                nn.Linear(self.in_feat_dim, 1024),
                nn.LeakyReLU(inplace=True),
                nn.Linear(1024, self.out_feat_dim),
            )

        else:
            self.mask_weights = None
            num_mask_classes = 1 if cls_agnostic_mask else num_classes
            self.predictor = Conv2d(conv_dims,
                                    num_mask_classes,
                                    kernel_size=1,
                                    stride=1,
                                    padding=0)
            # use normal distribution initialization for mask prediction layer
            nn.init.normal_(self.predictor.weight, std=0.001)
            if self.predictor.bias is not None:
                nn.init.constant_(self.predictor.bias, 0)

        for layer in self.conv_norm_relus + [self.deconv]:
            weight_init.c2_msra_fill(layer)
Exemple #5
0
    def __init__(self, in_channels, out_channels, *, stride=1, norm="BN"):
        """
        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            stride (int): Stride for the first conv.
            norm (str or callable): normalization for all conv layers.
                See :func:`layers.get_norm` for supported format.
        """
        super().__init__(in_channels, out_channels, stride)

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        self.conv1 = Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        self.conv2 = Conv2d(
            out_channels,
            out_channels,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        self.triplet_attention = TripletAttention(in_channels)

        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

        for layer in [self.conv1, self.conv2, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
Exemple #6
0
def conv_bn(inp, oup, stride):
    return nn.Sequential(Conv2d(inp, oup, 3, stride, 1, bias=False),
                         nn.ReLU6(inplace=True))
    def __init__(
        self,
        input_shape: Dict[str, ShapeSpec],
        *,
        project_channels: List[int],
        aspp_dilations: List[int],
        aspp_dropout: float,
        decoder_channels: List[int],
        common_stride: int,
        norm: Union[str, Callable],
        train_size: Optional[Tuple],
        loss_weight: float = 1.0,
        loss_type: str = "cross_entropy",
        ignore_value: int = -1,
        num_classes: Optional[int] = None,
        use_depthwise_separable_conv: bool = False,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape: shape of the input features. They will be ordered by stride
                and the last one (with largest stride) is used as the input to the
                decoder (i.e.  the ASPP module); the rest are low-level feature for
                the intermediate levels of decoder.
            project_channels (list[int]): a list of low-level feature channels.
                The length should be len(in_features) - 1.
            aspp_dilations (list(int)): a list of 3 dilations in ASPP.
            aspp_dropout (float): apply dropout on the output of ASPP.
            decoder_channels (list[int]): a list of output channels of each
                decoder stage. It should have the same length as "in_features"
                (each element in "in_features" corresponds to one decoder stage).
            common_stride (int): output stride of decoder.
            norm (str or callable): normalization for all conv layers.
            train_size (tuple): (height, width) of training images.
            loss_weight (float): loss weight.
            loss_type (str): type of loss function, 2 opptions:
                (1) "cross_entropy" is the standard cross entropy loss.
                (2) "hard_pixel_mining" is the loss in DeepLab that samples
                    top k% hardest pixels.
            ignore_value (int): category to be ignored during training.
            num_classes (int): number of classes, if set to None, the decoder
                will not construct a predictor.
            use_depthwise_separable_conv (bool): use DepthwiseSeparableConv2d
                in ASPP and decoder.
        """
        super().__init__()
        input_shape = sorted(input_shape.items(), key=lambda x: x[1].stride)

        # fmt: off
        self.in_features      = [k for k, v in input_shape]  # starting from "res2" to "res5"
        in_channels           = [x[1].channels for x in input_shape]
        in_strides            = [x[1].stride for x in input_shape]
        aspp_channels         = decoder_channels[-1]
        self.ignore_value     = ignore_value
        self.common_stride    = common_stride  # output stride
        self.loss_weight      = loss_weight
        self.loss_type        = loss_type
        self.decoder_only     = num_classes is None
        self.use_depthwise_separable_conv = use_depthwise_separable_conv
        # fmt: on

        assert (
            len(project_channels) == len(self.in_features) - 1
        ), "Expected {} project_channels, got {}".format(
            len(self.in_features) - 1, len(project_channels)
        )
        assert len(decoder_channels) == len(
            self.in_features
        ), "Expected {} decoder_channels, got {}".format(
            len(self.in_features), len(decoder_channels)
        )
        self.decoder = nn.ModuleDict()

        use_bias = norm == ""
        for idx, in_channel in enumerate(in_channels):
            decoder_stage = nn.ModuleDict()

            if idx == len(self.in_features) - 1:
                # ASPP module
                if train_size is not None:
                    train_h, train_w = train_size
                    encoder_stride = in_strides[-1]
                    if train_h % encoder_stride or train_w % encoder_stride:
                        raise ValueError("Crop size need to be divisible by encoder stride.")
                    pool_h = train_h // encoder_stride
                    pool_w = train_w // encoder_stride
                    pool_kernel_size = (pool_h, pool_w)
                else:
                    pool_kernel_size = None
                project_conv = ASPP(
                    in_channel,
                    aspp_channels,
                    aspp_dilations,
                    norm=norm,
                    activation=F.relu,
                    pool_kernel_size=pool_kernel_size,
                    dropout=aspp_dropout,
                    use_depthwise_separable_conv=use_depthwise_separable_conv,
                )
                fuse_conv = None
            else:
                project_conv = Conv2d(
                    in_channel,
                    project_channels[idx],
                    kernel_size=1,
                    bias=use_bias,
                    norm=get_norm(norm, project_channels[idx]),
                    activation=F.relu,
                )
                weight_init.c2_xavier_fill(project_conv)
                if use_depthwise_separable_conv:
                    # We use a single 5x5 DepthwiseSeparableConv2d to replace
                    # 2 3x3 Conv2d since they have the same receptive field,
                    # proposed in :paper:`Panoptic-DeepLab`.
                    fuse_conv = DepthwiseSeparableConv2d(
                        project_channels[idx] + decoder_channels[idx + 1],
                        decoder_channels[idx],
                        kernel_size=5,
                        padding=2,
                        norm1=norm,
                        activation1=F.relu,
                        norm2=norm,
                        activation2=F.relu,
                    )
                else:
                    fuse_conv = nn.Sequential(
                        Conv2d(
                            project_channels[idx] + decoder_channels[idx + 1],
                            decoder_channels[idx],
                            kernel_size=3,
                            padding=1,
                            bias=use_bias,
                            norm=get_norm(norm, decoder_channels[idx]),
                            activation=F.relu,
                        ),
                        Conv2d(
                            decoder_channels[idx],
                            decoder_channels[idx],
                            kernel_size=3,
                            padding=1,
                            bias=use_bias,
                            norm=get_norm(norm, decoder_channels[idx]),
                            activation=F.relu,
                        ),
                    )
                    weight_init.c2_xavier_fill(fuse_conv[0])
                    weight_init.c2_xavier_fill(fuse_conv[1])

            decoder_stage["project_conv"] = project_conv
            decoder_stage["fuse_conv"] = fuse_conv

            self.decoder[self.in_features[idx]] = decoder_stage

        if not self.decoder_only:
            self.predictor = Conv2d(
                decoder_channels[0], num_classes, kernel_size=1, stride=1, padding=0
            )
            nn.init.normal_(self.predictor.weight, 0, 0.001)
            nn.init.constant_(self.predictor.bias, 0)

            if self.loss_type == "cross_entropy":
                self.loss = nn.CrossEntropyLoss(reduction="mean", ignore_index=self.ignore_value)
            elif self.loss_type == "hard_pixel_mining":
                self.loss = DeepLabCE(ignore_label=self.ignore_value, top_k_percent_pixels=0.2)
            else:
                raise ValueError("Unexpected loss type: %s" % self.loss_type)
Exemple #8
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        stride_in_1x1=False,
        dilation=1,
        deform_modulated=False,
        deform_num_groups=1,
        basewidth=26, 
        scale=4,
    ):
        super().__init__(in_channels, out_channels, stride)
        self.deform_modulated = deform_modulated

        if in_channels != out_channels:
            # self.shortcut = Conv2d(
            #     in_channels,
            #     out_channels,
            #     kernel_size=1,
            #     stride=stride,
            #     bias=False,
            #     norm=get_norm(norm, out_channels),
            # )
            self.shortcut = nn.Sequential(
                nn.AvgPool2d(kernel_size=stride, stride=stride, 
                    ceil_mode=True, count_include_pad=False),
                Conv2d(
                    in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=1,
                    bias=False,
                    norm=get_norm(norm, out_channels),
                )
            )
        else:
            self.shortcut = None

        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
        width = bottleneck_channels//scale

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        if scale == 1:
          self.nums = 1
        else:
          self.nums = scale -1
        if self.in_channels!=self.out_channels and stride_3x3!=2:
            self.pool = nn.AvgPool2d(kernel_size=3, stride = stride_3x3, padding=1)

        if deform_modulated:
            deform_conv_op = ModulatedDeformConv
            # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size
            offset_channels = 27
        else:
            deform_conv_op = DeformConv
            offset_channels = 18

        # self.conv2_offset = Conv2d(
        #     bottleneck_channels,
        #     offset_channels * deform_num_groups,
        #     kernel_size=3,
        #     stride=stride_3x3,
        #     padding=1 * dilation,
        #     dilation=dilation,
        # )
        # self.conv2 = deform_conv_op(
        #     bottleneck_channels,
        #     bottleneck_channels,
        #     kernel_size=3,
        #     stride=stride_3x3,
        #     padding=1 * dilation,
        #     bias=False,
        #     groups=num_groups,
        #     dilation=dilation,
        #     deformable_groups=deform_num_groups,
        #     norm=get_norm(norm, bottleneck_channels),
        # )

        conv2_offsets = []
        convs = []
        bns = []
        for i in range(self.nums):
            conv2_offsets.append(Conv2d(
                            width, 
                            offset_channels * deform_num_groups, 
                            kernel_size=3, 
                            stride=stride_3x3, 
                            padding=1 * dilation, 
                            bias=False,
                            groups=num_groups,
                            dilation=dilation,
                            ))
            convs.append(deform_conv_op(
                            width, 
                            width, 
                            kernel_size=3, 
                            stride=stride_3x3, 
                            padding=1 * dilation, 
                            bias=False,
                            groups=num_groups,
                            dilation=dilation,
                            deformable_groups=deform_num_groups,
                            ))
            bns.append(get_norm(norm, width))
        self.conv2_offsets = nn.ModuleList(conv2_offsets)
        self.convs = nn.ModuleList(convs)
        self.bns = nn.ModuleList(bns)

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )
        self.scale = scale
        self.width = width
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.stride_3x3 = stride_3x3
        # for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
        #     if layer is not None:  # shortcut can be None
        #         weight_init.c2_msra_fill(layer)

        # nn.init.constant_(self.conv2_offset.weight, 0)
        # nn.init.constant_(self.conv2_offset.bias, 0)
        for layer in [self.conv1, self.conv3]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
        if self.shortcut is not None:
            for layer in self.shortcut.modules():
                if isinstance(layer, Conv2d):
                    weight_init.c2_msra_fill(layer)
                
        for layer in self.convs:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)

        for layer in self.conv2_offsets:
            if layer.weight is not None:
                nn.init.constant_(layer.weight, 0)
            if layer.bias is not None:
                nn.init.constant_(layer.bias, 0)
 def __init__(self,
              in_channels,
              channels,
              kernel_size,
              stride=(1, 1),
              padding=(0, 0),
              dilation=(1, 1),
              groups=1,
              bias=True,
              radix=2,
              reduction_factor=4,
              rectify=False,
              rectify_avg=False,
              norm=None,
              dropblock_prob=0.0,
              **kwargs):
     super(SplAtConv2d, self).__init__()
     padding = _pair(padding)
     self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
     self.rectify_avg = rectify_avg
     inter_channels = max(in_channels * radix // reduction_factor, 32)
     self.radix = radix
     self.cardinality = groups
     self.channels = channels
     self.dropblock_prob = dropblock_prob
     if self.rectify:
         from rfconv import RFConv2d
         self.conv = RFConv2d(in_channels,
                              channels * radix,
                              kernel_size,
                              stride,
                              padding,
                              dilation,
                              groups=groups * radix,
                              bias=bias,
                              average_mode=rectify_avg,
                              **kwargs)
     else:
         self.conv = Conv2d(in_channels,
                            channels * radix,
                            kernel_size,
                            stride,
                            padding,
                            dilation,
                            groups=groups * radix,
                            bias=bias,
                            **kwargs)
     self.use_bn = norm is not None
     if self.use_bn:
         self.bn0 = get_norm(norm, channels * radix)
     self.relu = ReLU(inplace=True)
     self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality)
     if self.use_bn:
         self.bn1 = get_norm(norm, inter_channels)
     self.fc2 = Conv2d(inter_channels,
                       channels * radix,
                       1,
                       groups=self.cardinality)
     if dropblock_prob > 0.0:
         self.dropblock = DropBlock2D(dropblock_prob, 3)
     self.rsoftmax = rSoftMax(radix, groups)
Exemple #10
0
    def __init__(self, in_channels_list, out_channels, norm=""):
        """
        Args:
            bottom_up (Backbone): module representing the bottom up subnetwork.
                Must be a subclass of :class:`Backbone`. The multi-scale feature
                maps generated by the bottom up network, and listed in `in_features`,
                are used to generate FPN levels.
            in_features (list[str]): names of the input feature maps coming
                from the backbone to which FPN is attached. For example, if the
                backbone produces ["res2", "res3", "res4"], any *contiguous* sublist
                of these may be used; order must be from high to low resolution.
            out_channels (int): number of channels in the output feature maps.
            norm (str): the normalization to use.
        """
        super(SingleBiFPN, self).__init__()

        self.out_channels = out_channels
        # build 5-levels bifpn
        if len(in_channels_list) == 5:
            self.nodes = [
                {
                    'feat_level': 3,
                    'inputs_offsets': [3, 4]
                },
                {
                    'feat_level': 2,
                    'inputs_offsets': [2, 5]
                },
                {
                    'feat_level': 1,
                    'inputs_offsets': [1, 6]
                },
                {
                    'feat_level': 0,
                    'inputs_offsets': [0, 7]
                },
                {
                    'feat_level': 1,
                    'inputs_offsets': [1, 7, 8]
                },
                {
                    'feat_level': 2,
                    'inputs_offsets': [2, 6, 9]
                },
                {
                    'feat_level': 3,
                    'inputs_offsets': [3, 5, 10]
                },
                {
                    'feat_level': 4,
                    'inputs_offsets': [4, 11]
                },
            ]
        elif len(in_channels_list) == 3:
            self.nodes = [
                {
                    'feat_level': 1,
                    'inputs_offsets': [1, 2]
                },
                {
                    'feat_level': 0,
                    'inputs_offsets': [0, 3]
                },
                {
                    'feat_level': 1,
                    'inputs_offsets': [1, 3, 4]
                },
                {
                    'feat_level': 2,
                    'inputs_offsets': [2, 5]
                },
            ]
        else:
            raise NotImplementedError

        node_info = [_ for _ in in_channels_list]

        num_output_connections = [0 for _ in in_channels_list]
        for fnode in self.nodes:
            feat_level = fnode["feat_level"]
            inputs_offsets = fnode["inputs_offsets"]
            inputs_offsets_str = "_".join(map(str, inputs_offsets))
            for input_offset in inputs_offsets:
                num_output_connections[input_offset] += 1

                in_channels = node_info[input_offset]
                if in_channels != out_channels:
                    lateral_conv = Conv2d(in_channels,
                                          out_channels,
                                          kernel_size=1,
                                          norm=get_norm(norm, out_channels))
                    self.add_module(
                        "lateral_{}_f{}".format(input_offset, feat_level),
                        lateral_conv)
            node_info.append(out_channels)
            num_output_connections.append(0)

            # generate attention weights
            name = "weights_f{}_{}".format(feat_level, inputs_offsets_str)
            self.__setattr__(
                name,
                nn.Parameter(torch.ones(len(inputs_offsets),
                                        dtype=torch.float32),
                             requires_grad=True))

            # generate convolutions after combination
            name = "outputs_f{}_{}".format(feat_level, inputs_offsets_str)
            self.add_module(
                name,
                Conv2d(out_channels,
                       out_channels,
                       kernel_size=3,
                       padding=1,
                       norm=get_norm(norm, out_channels),
                       bias=(norm == "")))
Exemple #11
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        stride_in_1x1=False,
        dilation=1,
        basewidth=26, 
        scale=4,
    ):
        """
        Args:
            bottleneck_channels (int): number of output channels for the 3x3
                "bottleneck" conv layers.
            num_groups (int): number of groups for the 3x3 conv layer.
            norm (str or callable): normalization for all conv layers.
                See :func:`layers.get_norm` for supported format.
            stride_in_1x1 (bool): when stride>1, whether to put stride in the
                first 1x1 convolution or the bottleneck 3x3 convolution.
            dilation (int): the dilation rate of the 3x3 conv layer.
        """
        super().__init__(in_channels, out_channels, stride)

        if in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.AvgPool2d(kernel_size=stride, stride=stride, 
                    ceil_mode=True, count_include_pad=False),
                Conv2d(
                    in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=1,
                    bias=False,
                    norm=get_norm(norm, out_channels),
                )
            )
        else:
            self.shortcut = None

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
        width = bottleneck_channels//scale

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )
        if scale == 1:
          self.nums = 1
        else:
          self.nums = scale -1
        if self.in_channels!=self.out_channels and stride_3x3!=2:
            self.pool = nn.AvgPool2d(kernel_size=3, stride = stride_3x3, padding=1)

        convs = []
        bns = []
        for i in range(self.nums):
            convs.append(nn.Conv2d(
                            width, 
                            width, 
                            kernel_size=3, 
                            stride=stride_3x3, 
                            padding=1 * dilation, 
                            bias=False,
                            groups=num_groups,
                            dilation=dilation,
                            ))
            bns.append(get_norm(norm, width))
        self.convs = nn.ModuleList(convs)
        self.bns = nn.ModuleList(bns)

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )
        self.scale = scale
        self.width = width
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.stride_3x3 = stride_3x3
        for layer in [self.conv1, self.conv3]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
        if self.shortcut is not None:
            for layer in self.shortcut.modules():
                if isinstance(layer, Conv2d):
                    weight_init.c2_msra_fill(layer)
                
        for layer in self.convs:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
Exemple #12
0
 def __init__(
         self,
         in_channels,
         out_channels,
         with_modulated_dcn=True,
         kernel_size=3,
         stride=1,
         groups=1,
         dilation=1,
         deformable_groups=1,
         bias=False,
         padding=None
 ):
     super(DFConv2d, self).__init__()
     if isinstance(kernel_size, (list, tuple)):
         assert isinstance(stride, (list, tuple))
         assert isinstance(dilation, (list, tuple))
         assert len(kernel_size) == 2
         assert len(stride) == 2
         assert len(dilation) == 2
         padding = (
             dilation[0] * (kernel_size[0] - 1) // 2,
             dilation[1] * (kernel_size[1] - 1) // 2
         )
         offset_base_channels = kernel_size[0] * kernel_size[1]
     else:
         padding = dilation * (kernel_size - 1) // 2
         offset_base_channels = kernel_size * kernel_size
     if with_modulated_dcn:
         from .deform_conv import ModulatedDeformConv
         offset_channels = offset_base_channels * 3  # default: 27
         conv_block = ModulatedDeformConv
     else:
         from .deform_conv import DeformConv
         offset_channels = offset_base_channels * 2  # default: 18
         conv_block = DeformConv
     self.offset = Conv2d(
         in_channels,
         deformable_groups * offset_channels,
         kernel_size=kernel_size,
         stride=stride,
         padding=padding,
         groups=1,
         dilation=dilation
     )
     for l in [self.offset, ]:
         nn.init.kaiming_uniform_(l.weight, a=1)
         torch.nn.init.constant_(l.bias, 0.)
     self.conv = conv_block(
         in_channels,
         out_channels,
         kernel_size=kernel_size,
         stride=stride,
         padding=padding,
         dilation=dilation,
         groups=groups,
         deformable_groups=deformable_groups,
         bias=bias
     )
     self.with_modulated_dcn = with_modulated_dcn
     self.kernel_size = kernel_size
     self.stride = stride
     self.padding = padding
     self.dilation = dilation
     self.offset_split = offset_base_channels * deformable_groups * 2
Exemple #13
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
        super().__init__()

        # fmt: off
        self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
        feature_strides = {k: v.stride for k, v in input_shape.items()}
        feature_channels = {k: v.channels for k, v in input_shape.items()}
        self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE
        num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES
        conv_dims = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM
        self.common_stride = cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE
        norm = cfg.MODEL.SEM_SEG_HEAD.NORM
        # fmt: on

        self.scale_pam_heads = []
        for in_feature in self.in_features:
            head_ops = []
            head_length = max(
                1,
                int(
                    np.log2(feature_strides[in_feature]) -
                    np.log2(self.common_stride)))
            for k in range(head_length):
                norm_module = nn.GroupNorm(32,
                                           conv_dims) if norm == "GN" else None
                conv = Conv2d(
                    feature_channels[in_feature] if k == 0 else conv_dims,
                    conv_dims,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    bias=not norm,
                    norm=norm_module,
                    activation=F.relu,
                )
                weight_init.c2_msra_fill(conv)
                head_ops.append(conv)
                if feature_strides[in_feature] != self.common_stride:
                    head_ops.append(
                        nn.Upsample(scale_factor=2,
                                    mode="bilinear",
                                    align_corners=False))
            self.scale_pam_heads.append(nn.Sequential(*head_ops))
            self.add_module(in_feature + '_pam', self.scale_pam_heads[-1])

        self.scale_cam_heads = []
        for in_feature in self.in_features:
            head_ops = []
            head_length = max(
                1,
                int(
                    np.log2(feature_strides[in_feature]) -
                    np.log2(self.common_stride)))
            for k in range(head_length):
                norm_module = nn.GroupNorm(32,
                                           conv_dims) if norm == "GN" else None
                conv = Conv2d(
                    feature_channels[in_feature] if k == 0 else conv_dims,
                    conv_dims,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    bias=not norm,
                    norm=norm_module,
                    activation=F.relu,
                )
                weight_init.c2_msra_fill(conv)
                head_ops.append(conv)
                if feature_strides[in_feature] != self.common_stride:
                    head_ops.append(
                        nn.Upsample(scale_factor=2,
                                    mode="bilinear",
                                    align_corners=False))
            self.scale_cam_heads.append(nn.Sequential(*head_ops))
            self.add_module(in_feature + '_cam', self.scale_cam_heads[-1])

        self.predictor = Conv2d(conv_dims,
                                num_classes,
                                kernel_size=1,
                                stride=1,
                                padding=0)
        weight_init.c2_msra_fill(self.predictor)
Exemple #14
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        stride_in_1x1=False,
        num_branch=3,
        dilations=(1, 2, 3),
        concat_output=False,
        test_branch_idx=-1,
        has_pool=False,
    ):
        """
        Args:
            bottleneck_channels (int): number of output channels for the 3x3
                "bottleneck" conv layers.
            num_groups (int): number of groups for the 3x3 conv layer.
            norm (str or callable): normalization for all conv layers.
                See :func:`layers.get_norm` for supported format.
            stride_in_1x1 (bool): when stride>1, whether to put stride in the
                first 1x1 convolution or the bottleneck 3x3 convolution.
            dilation (int): the dilation rate of the 3x3 conv layer.
        """
        super().__init__(in_channels, out_channels, stride)

        assert num_branch == len(dilations)

        self.num_branch = num_branch
        self.concat_output = concat_output
        self.test_branch_idx = test_branch_idx

        self.has_pool = has_pool
        self.pool_stride = stride
        stride = 1

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv2 = MRRPConv(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            paddings=dilations,
            bias=False,
            groups=num_groups,
            dilations=dilations,
            num_branch=num_branch,
            test_branch_idx=test_branch_idx,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)

        # Zero-initialize the last normalization in each residual branch,
        # so that at the beginning, the residual branch starts with zeros,
        # and each residual block behaves like an identity.
        # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
        # "For BN layers, the learnable scaling coefficient γ is initialized
        # to be 1, except for each residual block's last BN
        # where γ is initialized to be 0."

        # nn.init.constant_(self.conv3.norm.weight, 0)
        # TODO this somehow hurts performance when training GN models from scratch.
        # Add it as an option when we need to use this code to train a backbone.

        if self.has_pool:
            self.list_pool = []
            for _ in range(num_branch):
                self.list_pool.append(
                    nn.MaxPool2d(kernel_size=2, stride=self.pool_stride, padding=0)
                )
            self.list_pool = nn.ModuleList(self.list_pool)
Exemple #15
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        stride_in_1x1=False,
        dilation=1,
    ):
        """
        Similar to :class:`BottleneckBlock`, but with deformable conv in the 3x3 convolution.
        """
        super().__init__(in_channels, out_channels, stride)

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        deform_conv_op = DefeConv
        offset_channels = 2 * bottleneck_channels

        self.L = 1

        self.conv2_offset_a = Conv2d(
            bottleneck_channels,
            offset_channels,
            kernel_size=3,
            stride=1,
            padding=1 * dilation,
            dilation=dilation,
        )
        self.conv2_offset_b = Conv2d(
            bottleneck_channels,
            offset_channels,
            kernel_size=3,
            stride=1,
            padding=1 * dilation,
            dilation=dilation,
        )

        self.conv2 = deform_conv_op(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            bias=False,
            groups=num_groups,
            dilation=dilation,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)

        nn.init.constant_(self.conv2_offset_a.weight, 0)
        nn.init.constant_(self.conv2_offset_a.bias, 0)
        nn.init.constant_(self.conv2_offset_b.weight, 0)
        nn.init.constant_(self.conv2_offset_b.bias, 0)
Exemple #16
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_conv: the number of conv layers
            conv_dim: the dimension of the conv layers
            norm: normalization for the conv layers
        """
        super(Parallel_Amodal_Visible_Head, self).__init__()

        # fmt: off
        self.cfg = cfg
        num_classes       = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        conv_dims         = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM
        self.norm         = cfg.MODEL.ROI_MASK_HEAD.NORM
        num_conv          = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV
        # num_vis_conv      = cfg.MODEL.ROI_MASK_HEAD.NUM_VIS_CONV
        self.fm           = cfg.MODEL.ROI_MASK_HEAD.AMODAL_FEATURE_MATCHING
        self.fm_beta      = cfg.MODEL.ROI_MASK_HEAD.AMODAL_FM_BETA
        input_channels    = input_shape.channels
        cls_agnostic_mask = cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK
        self.SPRef        = cfg.MODEL.ROI_MASK_HEAD.RECON_NET.MEMORY_REFINE
        self.SPk          = cfg.MODEL.ROI_MASK_HEAD.RECON_NET.MEMORY_REFINE_K
        self.version      = cfg.MODEL.ROI_MASK_HEAD.VERSION
        self._output_size = (input_shape.channels, input_shape.height, input_shape.width)
        self.attention_mode = cfg.MODEL.ROI_MASK_HEAD.ATTENTION_MODE
        # fmt: on

        self.amodal_conv_norm_relus = []
        self.visible_conv_norm_relus = []
        for k in range(num_conv):
            a_conv = Conv2d(
                input_channels if k == 0 else conv_dims,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, conv_dims),
                activation=F.relu,
            )
            self.add_module("amodal_mask_fcn{}".format(k + 1), a_conv)
            self.amodal_conv_norm_relus.append(a_conv)

            v_conv = Conv2d(
                input_channels if k == 0 else conv_dims,
                conv_dims,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=not self.norm,
                norm=get_norm(self.norm, conv_dims),
                activation=F.relu,
            )
            self.add_module("visible_mask_fcn{}".format(k + 1), v_conv)
            self.visible_conv_norm_relus.append(v_conv)

        self.amodal_deconv = ConvTranspose2d(
            conv_dims if num_conv > 0 else input_channels,
            conv_dims,
            kernel_size=2,
            stride=2,
            padding=0,
        )

        self.visible_deconv = ConvTranspose2d(
                conv_dims if num_conv > 0 else input_channels,
                conv_dims,
                kernel_size=2,
                stride=2,
                padding=0,
            )

        num_mask_classes = 1 if cls_agnostic_mask else num_classes
        self.amodal_predictor = Conv2d(conv_dims, num_mask_classes, kernel_size=1, stride=1, padding=0)
        self.visible_predictor = Conv2d(conv_dims, num_mask_classes, kernel_size=1, stride=1, padding=0)

        nn.init.normal_(self.amodal_predictor.weight, std=0.001)
        if self.amodal_predictor.bias is not None:
            nn.init.constant_(self.amodal_predictor.bias, 0)
        # use normal distribution initialization for mask prediction layer
        nn.init.normal_(self.visible_predictor.weight, std=0.001)
        if self.visible_predictor.bias is not None:
            nn.init.constant_(self.visible_predictor.bias, 0)

        for layer in self.amodal_conv_norm_relus + [self.amodal_deconv] + self.visible_conv_norm_relus + [self.visible_deconv]:
            weight_init.c2_msra_fill(layer)
        # use normal distribution initialization for mask prediction layer
        # self.amodal_pool = nn.MaxPool2d(kernel_size=2)
        self.amodal_pool = nn.AvgPool2d(kernel_size=2)
        self.visible_pool = nn.AvgPool2d(kernel_size=2)

        if self.SPRef:
            self.fuse_layer = Conv2d(
                input_channels + self.cfg.MODEL.ROI_MASK_HEAD.RECON_NET.MEMORY_REFINE_K,
                input_channels,
                kernel_size=3,
                stride=1,
                padding=1
            )
Exemple #17
0
    def __init__(
            self,
            in_channels,
            out_channels,
            *,
            bottleneck_channels,
            stride=1,
            num_groups=1,
            norm="BN",
            stride_in_1x1=False,
            num_branch=3,
            dilations=(1, 2, 3),
            concat_output=False,
            test_branch_idx=-1,
    ):
        """
        Args:
            num_branch (int): the number of branches in TridentNet.
            dilations (tuple): the dilations of multiple branches in TridentNet.
            concat_output (bool): if concatenate outputs of multiple branches in TridentNet.
                Use 'True' for the last trident block.
        """
        super().__init__(in_channels, out_channels, stride)

        assert num_branch == len(dilations)

        self.num_branch = num_branch
        self.concat_output = concat_output
        self.test_branch_idx = test_branch_idx

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv2 = TridentConv(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            paddings=dilations,
            bias=False,
            groups=num_groups,
            dilations=dilations,
            num_branch=num_branch,
            test_branch_idx=test_branch_idx,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
Exemple #18
0
def conv_bn(inp, oup, stride, width_mult=1.0):
    inp = 3 if inp == 3 else scale_chan(inp, width_mult)
    oup = scale_chan(oup, width_mult)
    conv = Conv2d(inp, oup, 3, stride, 1, bias=False, norm=get_norm("BN", oup))
    weight_init.c2_msra_fill(conv)
    return nn.Sequential(conv, nn.ReLU(inplace=True))
Exemple #19
0
def conv_1x1_bn(inp, oup):
    return nn.Sequential(
        Conv2d(inp, oup, 1, 1, 0, bias=False, norm=get_norm("BN", oup)),
        nn.ReLU6(inplace=True))
    def __init__(self, cfg, input_shape: ShapeSpec):
        super().__init__()

        #
        # it should be the half of the original configuration, because we latently double the dim inside the GatedConv2d
        conv_dims = cfg.MODEL.INPAINTER.GENERATOR.CONV_DIMS
        in_channels = input_shape.channels

        # TODO: This is ugly
        # stage 1: coarse network
        self.coarse_network = nn.Sequential(
            GatedConv2d(in_channels,
                        conv_dims,
                        kernel_size=5,
                        padding=2,
                        activation=F.elu_),
            GatedConv2d(conv_dims,
                        2 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        stride=2,
                        activation=F.elu_),  # /2 x
            GatedConv2d(2 * conv_dims,
                        2 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            GatedConv2d(2 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        stride=2,
                        activation=F.elu_),  # /4 x
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),  # 5
            # Dialted Gated Conv Start
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=2,
                        dilation=2,
                        activation=F.elu_),
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=4,
                        dilation=4,
                        activation=F.elu_),
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=8,
                        dilation=8,
                        activation=F.elu_),
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=16,
                        dilation=16,
                        activation=F.elu_),  # 9
            # Dialted Gated Conv End
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),  # 11
            GatedDeConv2d(4 * conv_dims,
                          2 * conv_dims,
                          kernel_size=3,
                          padding=1,
                          activation=F.elu_),  # /2 x
            GatedConv2d(2 * conv_dims,
                        2 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),  # 13
            GatedDeConv2d(2 * conv_dims,
                          conv_dims,
                          kernel_size=3,
                          padding=1,
                          activation=F.elu_),  # 1x
            GatedConv2d(conv_dims,
                        conv_dims // 2,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            Conv2d(conv_dims // 2,
                   3,
                   kernel_size=3,
                   padding=1,
                   activation=F.tanh),  # the output layer is normal conv2d
        )

        # stage 2 branch a: with contextual attention module
        self.refinement_ctx_branch_1 = nn.Sequential(
            GatedConv2d(3,
                        conv_dims,
                        kernel_size=5,
                        padding=2,
                        activation=F.elu_),
            GatedConv2d(conv_dims,
                        conv_dims,
                        kernel_size=3,
                        padding=1,
                        stride=2,
                        activation=F.elu_),  # /2x
            GatedConv2d(conv_dims,
                        2 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            GatedConv2d(2 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        stride=2,
                        activation=F.elu_),  # /4x
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.relu_),  # NOTE: the activation is relu
        )
        self.ctx_module = ContextAttention(kernel_size=3,
                                           stride=2,
                                           fuse_size=3)  # ctx module
        self.refinement_ctx_branch_2 = nn.Sequential(
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
        )

        # stage 2: branch b: dilated gated conv
        self.refinement_conv_branch = nn.Sequential(
            GatedConv2d(3,
                        conv_dims,
                        kernel_size=5,
                        padding=2,
                        activation=F.elu_),
            GatedConv2d(conv_dims,
                        conv_dims,
                        kernel_size=3,
                        padding=1,
                        stride=2,
                        activation=F.elu_),  # /2x
            GatedConv2d(conv_dims,
                        2 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            GatedConv2d(2 * conv_dims,
                        2 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        stride=2,
                        activation=F.elu_),  # /4x
            GatedConv2d(2 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),  # 5
            # Dialted Gated Conv Start
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=2,
                        dilation=2,
                        activation=F.elu_),
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=4,
                        dilation=4,
                        activation=F.elu_),
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=8,
                        dilation=8,
                        activation=F.elu_),
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=16,
                        dilation=16,
                        activation=F.elu_),
            # Dialted Gated Conv End
        )

        # stage 2: refinement decoder
        self.refinement_decoder = nn.Sequential(
            GatedConv2d(8 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            GatedConv2d(4 * conv_dims,
                        4 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            GatedDeConv2d(4 * conv_dims,
                          2 * conv_dims,
                          kernel_size=3,
                          padding=1,
                          activation=F.elu_),  # /2x
            GatedConv2d(2 * conv_dims,
                        2 * conv_dims,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            GatedDeConv2d(2 * conv_dims,
                          conv_dims,
                          kernel_size=3,
                          padding=1,
                          activation=F.elu_),  # /1x
            GatedConv2d(conv_dims,
                        conv_dims // 2,
                        kernel_size=3,
                        padding=1,
                        activation=F.elu_),
            Conv2d(conv_dims // 2,
                   3,
                   kernel_size=3,
                   padding=1,
                   activation=F.tanh),  # the output layer is normal conv2d
        )
Exemple #21
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        stride_in_1x1=False,
        dilation=1,
    ):
        """
        Args:
            bottleneck_channels (int): number of output channels for the 3x3
                "bottleneck" conv layers.
            num_groups (int): number of groups for the 3x3 conv layer.
            norm (str or callable): normalization for all conv layers.
                See :func:`layers.get_norm` for supported format.
            stride_in_1x1 (bool): when stride>1, whether to put stride in the
                first 1x1 convolution or the bottleneck 3x3 convolution.
            dilation (int): the dilation rate of the 3x3 conv layer.
        """
        super().__init__(in_channels, out_channels, stride)
        #print("\n\n CONFIRMING THAT NEW RESNET IS PRINTED\n\n")

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv2 = Conv2d(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            bias=False,
            groups=num_groups,
            dilation=dilation,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
Exemple #22
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_conv, num_fc: the number of conv/fc layers
            conv_dim/fc_dim: the dimension of the conv/fc layers
            norm: normalization for the conv layers
        """
        super().__init__(cfg, input_shape)

        # TODO: move to layers.py?
        def make_fc(dim_in, hidden_dim, use_gn=False):
            '''
                Caffe2 implementation uses XavierFill, which in fact
                corresponds to kaiming_uniform_ in PyTorch
            '''
            assert not use_gn
            # if use_gn:
            #     fc = nn.Linear(dim_in, hidden_dim, bias=False)
            #     nn.init.kaiming_uniform_(fc.weight, a=1)
            #     return nn.Sequential(fc, group_norm(hidden_dim))
            fc = nn.Linear(dim_in, hidden_dim)
            nn.init.kaiming_uniform_(fc.weight, a=1)
            nn.init.constant_(fc.bias, 0)
            return fc

        # fmt: off
        fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
        norm = cfg.MODEL.ROI_BOX_HEAD.NORM

        self.embed_dim = cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.ATTENTION.EMBED_DIM
        self.groups = cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.ATTENTION.GROUP
        self.feat_dim = fc_dim
        self.base_stage = cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.ATTENTION.STAGE
        self.advanced_stage = cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.ATTENTION.ADVANCED_STAGE
        self.base_num = cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.REF_POST_NMS_TOP_N
        self.advanced_num = int(
            self.base_num * cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.RDN_RATIO)
        self.location_free = cfg.MODEL.SPATIOTEMPORAL.ROI_BOX_HEAD.ATTENTION.LOCATION_FREE
        # fmt: on

        self._output_size = (input_shape.channels, input_shape.height,
                             input_shape.width)
        input_size = np.prod(
            (input_shape.channels, input_shape.height, input_shape.width))

        fcs, Wgs, Wqs, Wks, Wvs = [], [], [], [], []
        for i in range(self.base_stage + self.advanced_stage + 1):
            r_size = input_size if i == 0 else fc_dim

            if i == self.base_stage and self.advanced_stage == 0:
                break

            if i != self.base_stage + self.advanced_stage:
                fcs.append(make_fc(r_size, fc_dim))
                self._output_size = fc_dim
            Wgs.append(
                Conv2d(self.embed_dim,
                       self.groups,
                       kernel_size=1,
                       stride=1,
                       padding=0))
            Wqs.append(make_fc(self.feat_dim, self.feat_dim))
            Wks.append(make_fc(self.feat_dim, self.feat_dim))
            Wvs.append(
                Conv2d(self.feat_dim * self.groups,
                       self.feat_dim,
                       kernel_size=1,
                       stride=1,
                       padding=0,
                       groups=self.groups))
            for l in [Wgs[i], Wvs[i]]:
                torch.nn.init.normal_(l.weight, std=0.01)
                torch.nn.init.constant_(l.bias, 0)
        self.fcs = nn.ModuleList(fcs)
        self.Wgs = nn.ModuleList(Wgs)
        self.Wqs = nn.ModuleList(Wqs)
        self.Wks = nn.ModuleList(Wks)
        self.Wvs = nn.ModuleList(Wvs)
Exemple #23
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        stride_in_1x1=False,
        dilation=1,
    ):
        """
        Args:
            bottleneck_channels (int): number of output channels for the 3x3
                "bottleneck" conv layers.
            num_groups (int): number of groups for the 3x3 conv layer.
            norm (str or callable): normalization for all conv layers.
                See :func:`layers.get_norm` for supported format.
            stride_in_1x1 (bool): when stride>1, whether to put stride in the
                first 1x1 convolution or the bottleneck 3x3 convolution.
            dilation (int): the dilation rate of the 3x3 conv layer.
        """
        super().__init__(in_channels, out_channels, stride)

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv2 = Conv2d(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            bias=False,
            groups=num_groups,
            dilation=dilation,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        self.triplet_attention = TripletAttention(in_channels)

        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

        for layer in [self.conv1, self.conv2, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)
def SpectralNormConv2d(*args, **kwargs):
    return torch.nn.utils.spectral_norm(Conv2d(*args, **kwargs))
Exemple #25
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        stride_in_1x1=False,
        dilation=1,
        deform_modulated=False,
        deform_num_groups=1,
    ):
        """
        Similar to :class:`BottleneckBlock`, but with deformable conv in the 3x3 convolution.
        """
        super().__init__(in_channels, out_channels, stride)
        self.deform_modulated = deform_modulated

        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=stride,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        if deform_modulated:
            deform_conv_op = ModulatedDeformConv
            # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size
            offset_channels = 27
        else:
            deform_conv_op = DeformConv
            offset_channels = 18

        self.conv2_offset = Conv2d(
            bottleneck_channels,
            offset_channels * deform_num_groups,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            dilation=dilation,
        )
        self.conv2 = deform_conv_op(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            padding=1 * dilation,
            bias=False,
            groups=num_groups,
            dilation=dilation,
            deformable_groups=deform_num_groups,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)

        nn.init.constant_(self.conv2_offset.weight, 0)
        nn.init.constant_(self.conv2_offset.bias, 0)
Exemple #26
0
    def __init__(
        self,
        input_shape: Dict[str, ShapeSpec],
        *,
        in_features: List[str],
        num_classes: int,
        conv_dims: int,
        common_stride: int,
        loss_weight: float = 1.0,
        norm: Optional[Union[str, Callable]] = None,
        ignore_value: int = -1
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape: shapes of the input features
            in_features: a list of input feature names to use
            num_classes: number of classes to predict
            conv_dims: number of output channels for the intermediate conv layers.
            common_stride: the common stride that all features will be upscaled to
            loss_weight: loss weight
            norm (str or callable): normalization for all conv layers
            ignore_value: category id to be ignored during training.
        """
        super().__init__()
        feature_strides = {k: v.stride for k, v in input_shape.items()}
        feature_channels = {k: v.channels for k, v in input_shape.items()}
        self.in_features = in_features
        self.ignore_value = ignore_value
        self.common_stride = common_stride
        self.loss_weight = loss_weight

        self.scale_heads = []
        for in_feature in self.in_features:
            head_ops = []
            head_length = max(
                1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
            )
            for k in range(head_length):
                norm_module = get_norm(norm, conv_dims)
                conv = Conv2d(
                    feature_channels[in_feature] if k == 0 else conv_dims,
                    conv_dims,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    bias=not norm,
                    norm=norm_module,
                    activation=F.relu,
                )
                weight_init.c2_msra_fill(conv)
                head_ops.append(conv)
                if feature_strides[in_feature] != self.common_stride:
                    head_ops.append(
                        nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
                    )
            self.scale_heads.append(nn.Sequential(*head_ops))
            self.add_module(in_feature, self.scale_heads[-1])
        self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
        weight_init.c2_msra_fill(self.predictor)
Exemple #27
0
    def __init__(
        self,
        input_shape: Dict[str, ShapeSpec],
        *,
        decoder_channels: List[int],
        norm: Union[str, Callable],
        head_channels: int,
        loss_weight: float,
        loss_type: str,
        loss_top_k: float,
        ignore_value: int,
        num_classes: int,
        **kwargs,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature
            decoder_channels (list[int]): a list of output channels of each
                decoder stage. It should have the same length as "in_features"
                (each element in "in_features" corresponds to one decoder stage).
            norm (str or callable): normalization for all conv layers.
            head_channels (int): the output channels of extra convolutions
                between decoder and predictor.
            loss_weight (float): loss weight.
            loss_top_k: (float): setting the top k% hardest pixels for
                "hard_pixel_mining" loss.
            loss_type, ignore_value, num_classes: the same as the base class.
        """
        super().__init__(
            input_shape,
            decoder_channels=decoder_channels,
            norm=norm,
            ignore_value=ignore_value,
            **kwargs,
        )
        assert self.decoder_only

        self.loss_weight = loss_weight
        use_bias = norm == ""
        # `head` is additional transform before predictor
        if self.use_depthwise_separable_conv:
            # We use a single 5x5 DepthwiseSeparableConv2d to replace
            # 2 3x3 Conv2d since they have the same receptive field.
            self.head = DepthwiseSeparableConv2d(
                decoder_channels[0],
                head_channels,
                kernel_size=5,
                padding=2,
                norm1=norm,
                activation1=F.relu,
                norm2=norm,
                activation2=F.relu,
            )
        else:
            self.head = nn.Sequential(
                Conv2d(
                    decoder_channels[0],
                    decoder_channels[0],
                    kernel_size=3,
                    padding=1,
                    bias=use_bias,
                    norm=get_norm(norm, decoder_channels[0]),
                    activation=F.relu,
                ),
                Conv2d(
                    decoder_channels[0],
                    head_channels,
                    kernel_size=3,
                    padding=1,
                    bias=use_bias,
                    norm=get_norm(norm, head_channels),
                    activation=F.relu,
                ),
            )
            weight_init.c2_xavier_fill(self.head[0])
            weight_init.c2_xavier_fill(self.head[1])
        self.predictor = Conv2d(head_channels, num_classes, kernel_size=1)
        nn.init.normal_(self.predictor.weight, 0, 0.001)
        nn.init.constant_(self.predictor.bias, 0)

        if loss_type == "cross_entropy":
            self.loss = nn.CrossEntropyLoss(reduction="mean",
                                            ignore_index=ignore_value)
        elif loss_type == "hard_pixel_mining":
            self.loss = DeepLabCE(ignore_label=ignore_value,
                                  top_k_percent_pixels=loss_top_k)
        else:
            raise ValueError("Unexpected loss type: %s" % loss_type)
Exemple #28
0
    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
        super(DecoderGlobal, self).__init__()

        # fmt: off
        self.in_features = in_features
        feature_strides = {k: v.stride for k, v in input_shape.items()}
        feature_channels = {k: v.channels for k, v in input_shape.items()}
        # num_classes           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
        num_classes = 77
        conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
        self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
        norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
        # fmt: on

        self.scale_heads = []
        for in_feature in self.in_features:
            head_ops = []
            head_length = max(
                1,
                int(
                    np.log2(feature_strides[in_feature]) -
                    np.log2(self.common_stride)))
            for k in range(head_length):
                conv = Conv2d(
                    feature_channels[in_feature] if k == 0 else conv_dims,
                    conv_dims,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    bias=not norm,
                    norm=get_norm(norm, conv_dims),
                    activation=F.relu,
                )
                weight_init.c2_msra_fill(conv)
                head_ops.append(conv)
                if feature_strides[in_feature] != self.common_stride:
                    head_ops.append(
                        nn.Upsample(scale_factor=2,
                                    mode="bilinear",
                                    align_corners=False))
            self.scale_heads.append(nn.Sequential(*head_ops))
            self.add_module(in_feature, self.scale_heads[-1])

        # self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)

        self.densepose_head = build_densepose_head(cfg, conv_dims)

        # predictor = []
        # for k in range(7):
        #     conv = Conv2d(
        #         conv_dims,
        #         conv_dims,
        #         kernel_size=3,
        #         stride=1,
        #         padding=1,
        #         bias=not norm,
        #         norm=get_norm(norm, conv_dims),
        #         activation=F.relu,
        #     )
        #     weight_init.c2_msra_fill(conv)
        #     predictor.append(conv)

        # conv = ConvTranspose2d(
        #     conv_dims, num_classes, 3, stride=2, padding=1
        # )
        # weight_init.c2_msra_fill(conv)
        # predictor.append(conv)
        # self.add_module("predictor", nn.Sequential(*predictor))

        self.predictor = Conv2d(cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM,
                                num_classes,
                                1,
                                stride=1,
                                padding=0)
Exemple #29
0
    def __init__(
        self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum"
    ):
        """
        Args:
            bottom_up (Backbone): module representing the bottom up subnetwork.
                Must be a subclass of :class:`Backbone`. The multi-scale feature
                maps generated by the bottom up network, and listed in `in_features`,
                are used to generate FPN levels.
            in_features (list[str]): names of the input feature maps coming
                from the backbone to which FPN is attached. For example, if the
                backbone produces ["res2", "res3", "res4"], any *contiguous* sublist
                of these may be used; order must be from high to low resolution.
            out_channels (int): number of channels in the output feature maps.
            norm (str): the normalization to use.
            top_block (nn.Module or None): if provided, an extra operation will
                be performed on the output of the last (smallest resolution)
                FPN output, and the result will extend the result list. The top_block
                further downsamples the feature map. It must have an attribute
                "num_levels", meaning the number of extra FPN levels added by
                this block, and "in_feature", which is a string representing
                its input feature (e.g., p5).
            fuse_type (str): types for fusing the top down features and the lateral
                ones. It can be "sum" (default), which sums up element-wise; or "avg",
                which takes the element-wise mean of the two.
        """
        super(FPN, self).__init__()
        assert isinstance(bottom_up, Backbone)

        # Feature map strides and channels from the bottom up network (e.g. ResNet)
        input_shapes = bottom_up.output_shape()
        in_strides = [input_shapes[f].stride for f in in_features]
        in_channels = [input_shapes[f].channels for f in in_features]

        _assert_strides_are_log2_contiguous(in_strides)
        lateral_convs = []
        output_convs = []

        use_bias = norm == ""
        for idx, in_channels in enumerate(in_channels):
            lateral_norm = get_norm(norm, out_channels)
            output_norm = get_norm(norm, out_channels)

            lateral_conv = Conv2d(
                in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm
            )
            output_conv = Conv2d(
                out_channels,
                out_channels,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=use_bias,
                norm=output_norm,
            )
            weight_init.c2_xavier_fill(lateral_conv)
            weight_init.c2_xavier_fill(output_conv)
            stage = int(math.log2(in_strides[idx]))
            self.add_module("fpn_lateral{}".format(stage), lateral_conv)
            self.add_module("fpn_output{}".format(stage), output_conv)

            lateral_convs.append(lateral_conv)
            output_convs.append(output_conv)
        # Place convs into top-down order (from low to high resolution)
        # to make the top-down computation in forward clearer.
        self.lateral_convs = lateral_convs[::-1]
        self.output_convs = output_convs[::-1]
        self.top_block = top_block
        self.in_features = in_features
        self.bottom_up = bottom_up
        # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"]
        self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in in_strides}
        # top block output feature maps.
        if self.top_block is not None:
            for s in range(stage, stage + self.top_block.num_levels):
                self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1)

        self._out_features = list(self._out_feature_strides.keys())
        self._out_feature_channels = {k: out_channels for k in self._out_features}
        self._size_divisibility = in_strides[-1]
        assert fuse_type in {"avg", "sum"}
        self._fuse_type = fuse_type
Exemple #30
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        stride_in_1x1=False,
        dilation=1,
        deform_modulated=False,
        deform_num_groups=1,
        avd=False,
        avg_down=False,
        radix=2,
        bottleneck_width=64,
    ):
        """
        Similar to :class:`BottleneckBlock`, but with deformable conv in the 3x3 convolution.
        """
        super().__init__(in_channels, out_channels, stride)
        self.deform_modulated = deform_modulated
        self.avd = avd and (stride > 1)
        self.avg_down = avg_down
        self.radix = radix

        cardinality = num_groups
        group_width = int(bottleneck_channels *
                          (bottleneck_width / 64.)) * cardinality

        if in_channels != out_channels:
            if self.avg_down:
                self.shortcut_avgpool = nn.AvgPool2d(kernel_size=stride,
                                                     stride=stride,
                                                     ceil_mode=True,
                                                     count_include_pad=False)
                self.shortcut = Conv2d(
                    in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=1,
                    bias=False,
                    norm=get_norm(norm, out_channels),
                )
            else:
                self.shortcut = Conv2d(
                    in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=stride,
                    bias=False,
                    norm=get_norm(norm, out_channels),
                )
        else:
            self.shortcut = None

        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
        self.stride_1x1 = stride_1x1
        self.stride_3x3 = stride_3x3

        self.conv1 = Conv2d(
            in_channels,
            group_width,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
            norm=get_norm(norm, group_width),
        )

        if deform_modulated:
            deform_conv_op = ModulatedDeformConv
            # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size
            offset_channels = 27
        else:
            deform_conv_op = DeformConv
            offset_channels = 18

        self.conv2_offset = Conv2d(
            bottleneck_channels,
            offset_channels * deform_num_groups,
            kernel_size=3,
            stride=1 if self.avd else stride_3x3,
            padding=1 * dilation,
            dilation=dilation,
            groups=deform_num_groups,
        )
        if self.radix > 1:
            from .splat import SplAtConv2d_dcn
            self.conv2 = SplAtConv2d_dcn(
                group_width,
                group_width,
                kernel_size=3,
                stride=1 if self.avd else stride_3x3,
                padding=dilation,
                dilation=dilation,
                groups=cardinality,
                bias=False,
                radix=self.radix,
                norm=norm,
                deform_conv_op=deform_conv_op,
                deformable_groups=deform_num_groups,
                deform_modulated=deform_modulated,
            )
        else:
            self.conv2 = deform_conv_op(
                bottleneck_channels,
                bottleneck_channels,
                kernel_size=3,
                stride=1 if self.avd else stride_3x3,
                padding=1 * dilation,
                bias=False,
                groups=num_groups,
                dilation=dilation,
                deformable_groups=deform_num_groups,
                norm=get_norm(norm, bottleneck_channels),
            )

        if self.avd:
            self.avd_layer = nn.AvgPool2d(3, stride, padding=1)

        self.conv3 = Conv2d(
            group_width,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        if self.radix > 1:
            for layer in [self.conv1, self.conv3, self.shortcut]:
                if layer is not None:  # shortcut can be None
                    weight_init.c2_msra_fill(layer)
        else:
            for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
                if layer is not None:  # shortcut can be None
                    weight_init.c2_msra_fill(layer)

        nn.init.constant_(self.conv2_offset.weight, 0)
        nn.init.constant_(self.conv2_offset.bias, 0)