def __init__(self, in_channels, out_channels, *, stride=1, norm="BN"): """ Args: in_channels (int): Number of input channels. out_channels (int): Number of output channels. stride (int): Stride for the first conv. norm (str or callable): normalization for all conv layers. See :func:`layers.get_norm` for supported format. """ super().__init__(in_channels, out_channels, stride) if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None self.conv1 = Conv2d( in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False, norm=get_norm(norm, out_channels), ) self.conv2 = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__(self, cfg, input_shape: ShapeSpec): """ The following attributes are parsed from config: num_conv, num_fc: the number of conv/fc layers conv_dim/fc_dim: the dimension of the conv/fc layers norm: normalization for the conv layers """ super().__init__() # fmt: off num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM norm = cfg.MODEL.ROI_BOX_HEAD.NORM # fmt: on assert num_conv + num_fc > 0 self._output_size = (input_shape.channels, input_shape.height, input_shape.width) self.conv_norm_relus = [] for k in range(num_conv): conv = Conv2d( self._output_size[0], conv_dim, kernel_size=3, padding=1, bias=not norm, norm=get_norm(norm, conv_dim), activation=F.relu, ) self.add_module("conv{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) self.fcs = [] for k in range(num_fc): fc = nn.Linear(np.prod(self._output_size), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.fcs.append(fc) self._output_size = fc_dim for layer in self.conv_norm_relus: weight_init.c2_msra_fill(layer) for layer in self.fcs: weight_init.c2_xavier_fill(layer)
def __init__(self, in_channels=3, out_channels=64, norm="BN"): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). """ super().__init__() self.conv1 = Conv2d( in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=False, norm=get_norm(norm, out_channels), ) weight_init.c2_msra_fill(self.conv1)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", stride_in_1x1=False, dilation=1, deform_modulated=False, deform_num_groups=1, ): """ Similar to :class:`BottleneckBlock`, but with deformable conv in the 3x3 convolution. """ super().__init__(in_channels, out_channels, stride) self.deform_modulated = deform_modulated if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, bottleneck_channels), ) if deform_modulated: deform_conv_op = ModulatedDeformConv # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size offset_channels = 27 else: deform_conv_op = DeformConv offset_channels = 18 self.conv2_offset = Conv2d( bottleneck_channels, offset_channels * deform_num_groups, kernel_size=3, stride=stride_3x3, padding=1 * dilation, dilation=dilation, ) self.conv2 = deform_conv_op( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, deformable_groups=deform_num_groups, norm=get_norm(norm, bottleneck_channels), ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer) nn.init.constant_(self.conv2_offset.weight, 0) nn.init.constant_(self.conv2_offset.bias, 0)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", stride_in_1x1=False, dilation=1, ): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). stride_in_1x1 (bool): when stride==2, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution. """ super().__init__(in_channels, out_channels, stride) if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, bottleneck_channels), ) self.conv2 = Conv2d( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, norm=get_norm(norm, bottleneck_channels), ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__( self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum" ): """ Args: bottom_up (Backbone): module representing the bottom up subnetwork. Must be a subclass of :class:`Backbone`. The multi-scale feature maps generated by the bottom up network, and listed in `in_features`, are used to generate FPN levels. in_features (list[str]): names of the input feature maps coming from the backbone to which FPN is attached. For example, if the backbone produces ["res2", "res3", "res4"], any *contiguous* sublist of these may be used; order must be from high to low resolution. out_channels (int): number of channels in the output feature maps. norm (str): the normalization to use. top_block (nn.Module or None): if provided, an extra operation will be performed on the output of the last (smallest resolution) FPN output, and the result will extend the result list. The top_block further downsamples the feature map. It must have an attribute "num_levels", meaning the number of extra FPN levels added by this block, and "in_feature", which is a string representing its input feature (e.g., p5). fuse_type (str): types for fusing the top down features and the lateral ones. It can be "sum" (default), which sums up element-wise; or "avg", which takes the element-wise mean of the two. """ super(FPN, self).__init__() assert isinstance(bottom_up, Backbone) # Feature map strides and channels from the bottom up network (e.g. ResNet) in_strides = [bottom_up.out_feature_strides[f] for f in in_features] in_channels = [bottom_up.out_feature_channels[f] for f in in_features] _assert_strides_are_log2_contiguous(in_strides) lateral_convs = [] output_convs = [] use_bias = norm == "" for idx, in_channels in enumerate(in_channels): lateral_norm = get_norm(norm, out_channels) output_norm = get_norm(norm, out_channels) lateral_conv = Conv2d( in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm ) output_conv = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=use_bias, norm=output_norm, ) weight_init.c2_xavier_fill(lateral_conv) weight_init.c2_xavier_fill(output_conv) stage = int(math.log2(in_strides[idx])) self.add_module("fpn_lateral{}".format(stage), lateral_conv) self.add_module("fpn_output{}".format(stage), output_conv) lateral_convs.append(lateral_conv) output_convs.append(output_conv) # Place convs into top-down order (from low to high resolution) # to make the top-down computation in forward clearer. self.lateral_convs = lateral_convs[::-1] self.output_convs = output_convs[::-1] self.top_block = top_block self.in_features = in_features self.bottom_up = bottom_up # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"] self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in in_strides} # top block output feature maps. if self.top_block is not None: for s in range(stage, stage + self.top_block.num_levels): self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1) self._out_features = list(self._out_feature_strides.keys()) self._out_feature_channels = {k: out_channels for k in self._out_features} self._size_divisibility = in_strides[-1] assert fuse_type in {"avg", "sum"} self._fuse_type = fuse_type